LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 TII(Builder.getMF().getSubtarget().getInstrInfo()),
66 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
67 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
68 (void)this->VT;
69}
70
72 return *Builder.getMF().getSubtarget().getTargetLowering();
73}
74
76 return Builder.getMF();
77}
78
82
83LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
84
85/// \returns The little endian in-memory byte position of byte \p I in a
86/// \p ByteWidth bytes wide type.
87///
88/// E.g. Given a 4-byte type x, x[0] -> byte 0
89static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
90 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
91 return I;
92}
93
94/// Determines the LogBase2 value for a non-null input value using the
95/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
97 auto &MRI = *MIB.getMRI();
98 LLT Ty = MRI.getType(V);
99 auto Ctlz = MIB.buildCTLZ(Ty, V);
100 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
101 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
102}
103
104/// \returns The big endian in-memory byte position of byte \p I in a
105/// \p ByteWidth bytes wide type.
106///
107/// E.g. Given a 4-byte type x, x[0] -> byte 3
108static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
109 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
110 return ByteWidth - I - 1;
111}
112
113/// Given a map from byte offsets in memory to indices in a load/store,
114/// determine if that map corresponds to a little or big endian byte pattern.
115///
116/// \param MemOffset2Idx maps memory offsets to address offsets.
117/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
118///
119/// \returns true if the map corresponds to a big endian byte pattern, false if
120/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
121///
122/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
123/// are as follows:
124///
125/// AddrOffset Little endian Big endian
126/// 0 0 3
127/// 1 1 2
128/// 2 2 1
129/// 3 3 0
130static std::optional<bool>
132 int64_t LowestIdx) {
133 // Need at least two byte positions to decide on endianness.
134 unsigned Width = MemOffset2Idx.size();
135 if (Width < 2)
136 return std::nullopt;
137 bool BigEndian = true, LittleEndian = true;
138 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
139 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
140 if (MemOffsetAndIdx == MemOffset2Idx.end())
141 return std::nullopt;
142 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
143 assert(Idx >= 0 && "Expected non-negative byte offset?");
144 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
145 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
146 if (!BigEndian && !LittleEndian)
147 return std::nullopt;
148 }
149
150 assert((BigEndian != LittleEndian) &&
151 "Pattern cannot be both big and little endian!");
152 return BigEndian;
153}
154
156
157bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
158 assert(LI && "Must have LegalizerInfo to query isLegal!");
159 return LI->getAction(Query).Action == LegalizeActions::Legal;
160}
161
163 const LegalityQuery &Query) const {
164 return isPreLegalize() || isLegal(Query);
165}
166
168 return isLegal(Query) ||
169 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
170}
171
173 const LegalityQuery &Query) const {
174 LegalizeAction Action = LI->getAction(Query).Action;
175 return Action == LegalizeActions::Legal ||
177}
178
180 if (!Ty.isVector())
181 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
182 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
183 if (isPreLegalize())
184 return true;
185 LLT EltTy = Ty.getElementType();
186 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
187 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
188}
189
191 Register ToReg) const {
192 Observer.changingAllUsesOfReg(MRI, FromReg);
193
194 if (MRI.constrainRegAttrs(ToReg, FromReg))
195 MRI.replaceRegWith(FromReg, ToReg);
196 else
197 Builder.buildCopy(FromReg, ToReg);
198
199 Observer.finishedChangingAllUsesOfReg();
200}
201
203 MachineOperand &FromRegOp,
204 Register ToReg) const {
205 assert(FromRegOp.getParent() && "Expected an operand in an MI");
206 Observer.changingInstr(*FromRegOp.getParent());
207
208 FromRegOp.setReg(ToReg);
209
210 Observer.changedInstr(*FromRegOp.getParent());
211}
212
214 unsigned ToOpcode) const {
215 Observer.changingInstr(FromMI);
216
217 FromMI.setDesc(Builder.getTII().get(ToOpcode));
218
219 Observer.changedInstr(FromMI);
220}
221
223 return RBI->getRegBank(Reg, MRI, *TRI);
224}
225
227 const RegisterBank *RegBank) const {
228 if (RegBank)
229 MRI.setRegBank(Reg, *RegBank);
230}
231
233 if (matchCombineCopy(MI)) {
235 return true;
236 }
237 return false;
238}
240 if (MI.getOpcode() != TargetOpcode::COPY)
241 return false;
242 Register DstReg = MI.getOperand(0).getReg();
243 Register SrcReg = MI.getOperand(1).getReg();
244 return canReplaceReg(DstReg, SrcReg, MRI);
245}
247 Register DstReg = MI.getOperand(0).getReg();
248 Register SrcReg = MI.getOperand(1).getReg();
249 replaceRegWith(MRI, DstReg, SrcReg);
250 MI.eraseFromParent();
251}
252
254 MachineInstr &MI, BuildFnTy &MatchInfo) const {
255 assert(MI.getOpcode() == TargetOpcode::G_FREEZE && "Invalid instruction");
256
257 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
258 Register DstOp = MI.getOperand(0).getReg();
259 Register OrigOp = MI.getOperand(1).getReg();
260
261 if (!MRI.hasOneNonDBGUse(OrigOp))
262 return false;
263
264 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
265 // Even if only a single operand of the PHI is not guaranteed non-poison,
266 // moving freeze() backwards across a PHI can cause optimization issues for
267 // other users of that operand.
268 //
269 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
270 // the source register is unprofitable because it makes the freeze() more
271 // strict than is necessary (it would affect the whole register instead of
272 // just the subreg being frozen).
273 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
274 return false;
275
276 if (canCreateUndefOrPoison(OrigOp, MRI,
277 /*ConsiderFlagsAndMetadata=*/false))
278 return false;
279
280 std::optional<MachineOperand> MaybePoisonOperand;
281 for (MachineOperand &Operand : OrigDef->uses()) {
282 if (!Operand.isReg())
283 return false;
284
285 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
286 continue;
287
288 if (!MaybePoisonOperand)
289 MaybePoisonOperand = Operand;
290 else {
291 // We have more than one maybe-poison operand. Moving the freeze is
292 // unsafe.
293 return false;
294 }
295 }
296
297 // Eliminate freeze if all operands are guaranteed non-poison.
298 if (!MaybePoisonOperand) {
299 MatchInfo = [=](MachineIRBuilder &B) {
300 Observer.changingInstr(*OrigDef);
301 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
302 Observer.changedInstr(*OrigDef);
303 B.buildCopy(DstOp, OrigOp);
304 };
305 return true;
306 }
307
308 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
309 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
310
312 {TargetOpcode::G_FREEZE, {MaybePoisonOperandRegTy}}))
313 return false;
314
315 MatchInfo = [=](MachineIRBuilder &B) mutable {
316 Observer.changingInstr(*OrigDef);
317 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
318 Observer.changedInstr(*OrigDef);
319 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
320 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
322 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
323 Freeze.getReg(0));
324 replaceRegWith(MRI, DstOp, OrigOp);
325 };
326 return true;
327}
328
331 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
332 "Invalid instruction");
333 bool IsUndef = true;
334 MachineInstr *Undef = nullptr;
335
336 // Walk over all the operands of concat vectors and check if they are
337 // build_vector themselves or undef.
338 // Then collect their operands in Ops.
339 for (const MachineOperand &MO : MI.uses()) {
340 Register Reg = MO.getReg();
341 MachineInstr *Def = MRI.getVRegDef(Reg);
342 assert(Def && "Operand not defined");
343 if (!MRI.hasOneNonDBGUse(Reg))
344 return false;
345 switch (Def->getOpcode()) {
346 case TargetOpcode::G_BUILD_VECTOR:
347 IsUndef = false;
348 // Remember the operands of the build_vector to fold
349 // them into the yet-to-build flattened concat vectors.
350 for (const MachineOperand &BuildVecMO : Def->uses())
351 Ops.push_back(BuildVecMO.getReg());
352 break;
353 case TargetOpcode::G_IMPLICIT_DEF: {
354 LLT OpType = MRI.getType(Reg);
355 // Keep one undef value for all the undef operands.
356 if (!Undef) {
357 Builder.setInsertPt(*MI.getParent(), MI);
358 Undef = Builder.buildUndef(OpType.getScalarType());
359 }
360 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
361 OpType.getScalarType() &&
362 "All undefs should have the same type");
363 // Break the undef vector in as many scalar elements as needed
364 // for the flattening.
365 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
366 EltIdx != EltEnd; ++EltIdx)
367 Ops.push_back(Undef->getOperand(0).getReg());
368 break;
369 }
370 default:
371 return false;
372 }
373 }
374
375 // Check if the combine is illegal
376 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
378 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
379 return false;
380 }
381
382 if (IsUndef)
383 Ops.clear();
384
385 return true;
386}
389 // We determined that the concat_vectors can be flatten.
390 // Generate the flattened build_vector.
391 Register DstReg = MI.getOperand(0).getReg();
392 Builder.setInsertPt(*MI.getParent(), MI);
393 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
394
395 // Note: IsUndef is sort of redundant. We could have determine it by
396 // checking that at all Ops are undef. Alternatively, we could have
397 // generate a build_vector of undefs and rely on another combine to
398 // clean that up. For now, given we already gather this information
399 // in matchCombineConcatVectors, just save compile time and issue the
400 // right thing.
401 if (Ops.empty())
402 Builder.buildUndef(NewDstReg);
403 else
404 Builder.buildBuildVector(NewDstReg, Ops);
405 replaceRegWith(MRI, DstReg, NewDstReg);
406 MI.eraseFromParent();
407}
408
411 auto &BV = cast<GBuildVector>(MI);
412
413 // Look at the first operand for a unmerge(bitcast) from a scalar type.
414 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
415 if (!Unmerge || Unmerge->getReg(0) != BV.getSourceReg(0))
416 return false;
417 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
418 if (BC->getOpcode() != TargetOpcode::G_BITCAST)
419 return false;
420 LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
421 unsigned Factor = Unmerge->getNumDefs();
422 if (!InputTy.isScalar() || BV.getNumSources() % Factor != 0)
423 return false;
424
425 // Check if the build_vector is legal
426 LLT BVDstTy = LLT::fixed_vector(BV.getNumSources() / Factor, InputTy);
427 if (!isLegal({TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
428 return false;
429
430 // Check all other operands are bitcasts or undef.
431 for (unsigned Idx = 0; Idx < BV.getNumSources(); Idx += Factor) {
432 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(Idx), MRI);
433 if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
434 MachineInstr *Src = MRI.getVRegDef(BV.getSourceReg(Idx + J));
435 if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
436 return true;
437 return Unmerge && BV.getSourceReg(Idx + J) == Unmerge->getReg(J);
438 }))
439 return false;
440 if (!Unmerge)
441 Ops.push_back(0);
442 else {
443 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
444 if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
445 MRI.getType(BC->getOperand(1).getReg()) != InputTy)
446 return false;
447 Ops.push_back(BC->getOperand(1).getReg());
448 }
449 }
450
451 return true;
452}
453
456 LLT SrcTy = MRI.getType(Ops[0]);
457 // Build undef if any operations require it.
458 Register Undef = 0;
459 for (Register &Op : Ops) {
460 if (!Op) {
461 if (!Undef)
462 Undef = Builder.buildUndef(SrcTy).getReg(0);
463 Op = Undef;
464 }
465 }
466
467 LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
468 auto BV = Builder.buildBuildVector(BVDstTy, Ops);
469 Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
470 MI.eraseFromParent();
471}
472
474 auto &Shuffle = cast<GShuffleVector>(MI);
475
476 Register SrcVec1 = Shuffle.getSrc1Reg();
477 Register SrcVec2 = Shuffle.getSrc2Reg();
478 LLT EltTy = MRI.getType(SrcVec1).getElementType();
479 int Width = MRI.getType(SrcVec1).getNumElements();
480
481 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
482 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
483
484 SmallVector<Register> Extracts;
485 // Select only applicable elements from unmerged values.
486 for (int Val : Shuffle.getMask()) {
487 if (Val == -1)
488 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
489 else if (Val < Width)
490 Extracts.push_back(Unmerge1.getReg(Val));
491 else
492 Extracts.push_back(Unmerge2.getReg(Val - Width));
493 }
494 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
495 if (Extracts.size() == 1)
496 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
497 else
498 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
499 MI.eraseFromParent();
500}
501
504 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
505 auto ConcatMI1 =
506 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
507 auto ConcatMI2 =
508 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
509 if (!ConcatMI1 || !ConcatMI2)
510 return false;
511
512 // Check that the sources of the Concat instructions have the same type
513 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
514 MRI.getType(ConcatMI2->getSourceReg(0)))
515 return false;
516
517 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
518 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
519 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
520 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
521 // Check if the index takes a whole source register from G_CONCAT_VECTORS
522 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
523 if (Mask[i] == -1) {
524 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
525 if (i + j >= Mask.size())
526 return false;
527 if (Mask[i + j] != -1)
528 return false;
529 }
531 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
532 return false;
533 Ops.push_back(0);
534 } else if (Mask[i] % ConcatSrcNumElt == 0) {
535 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
536 if (i + j >= Mask.size())
537 return false;
538 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
539 return false;
540 }
541 // Retrieve the source register from its respective G_CONCAT_VECTORS
542 // instruction
543 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
544 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
545 } else {
546 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
547 ConcatMI1->getNumSources()));
548 }
549 } else {
550 return false;
551 }
552 }
553
555 {TargetOpcode::G_CONCAT_VECTORS,
556 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
557 return false;
558
559 return !Ops.empty();
560}
561
564 LLT SrcTy;
565 for (Register &Reg : Ops) {
566 if (Reg != 0)
567 SrcTy = MRI.getType(Reg);
568 }
569 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
570
571 Register UndefReg = 0;
572
573 for (Register &Reg : Ops) {
574 if (Reg == 0) {
575 if (UndefReg == 0)
576 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
577 Reg = UndefReg;
578 }
579 }
580
581 if (Ops.size() > 1)
582 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
583 else
584 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
585 MI.eraseFromParent();
586}
587
590 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
591 "Invalid instruction kind");
592 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
593 Register Src1 = MI.getOperand(1).getReg();
594 LLT SrcType = MRI.getType(Src1);
595
596 unsigned DstNumElts = DstType.getNumElements();
597 unsigned SrcNumElts = SrcType.getNumElements();
598
599 // If the resulting vector is smaller than the size of the source
600 // vectors being concatenated, we won't be able to replace the
601 // shuffle vector into a concat_vectors.
602 //
603 // Note: We may still be able to produce a concat_vectors fed by
604 // extract_vector_elt and so on. It is less clear that would
605 // be better though, so don't bother for now.
606 //
607 // If the destination is a scalar, the size of the sources doesn't
608 // matter. we will lower the shuffle to a plain copy. This will
609 // work only if the source and destination have the same size. But
610 // that's covered by the next condition.
611 //
612 // TODO: If the size between the source and destination don't match
613 // we could still emit an extract vector element in that case.
614 if (DstNumElts < 2 * SrcNumElts)
615 return false;
616
617 // Check that the shuffle mask can be broken evenly between the
618 // different sources.
619 if (DstNumElts % SrcNumElts != 0)
620 return false;
621
622 // Mask length is a multiple of the source vector length.
623 // Check if the shuffle is some kind of concatenation of the input
624 // vectors.
625 unsigned NumConcat = DstNumElts / SrcNumElts;
626 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
627 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
628 for (unsigned i = 0; i != DstNumElts; ++i) {
629 int Idx = Mask[i];
630 // Undef value.
631 if (Idx < 0)
632 continue;
633 // Ensure the indices in each SrcType sized piece are sequential and that
634 // the same source is used for the whole piece.
635 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
636 (ConcatSrcs[i / SrcNumElts] >= 0 &&
637 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
638 return false;
639 // Remember which source this index came from.
640 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
641 }
642
643 // The shuffle is concatenating multiple vectors together.
644 // Collect the different operands for that.
645 Register UndefReg;
646 Register Src2 = MI.getOperand(2).getReg();
647 for (auto Src : ConcatSrcs) {
648 if (Src < 0) {
649 if (!UndefReg) {
650 Builder.setInsertPt(*MI.getParent(), MI);
651 UndefReg = Builder.buildUndef(SrcType).getReg(0);
652 }
653 Ops.push_back(UndefReg);
654 } else if (Src == 0)
655 Ops.push_back(Src1);
656 else
657 Ops.push_back(Src2);
658 }
659 return true;
660}
661
663 ArrayRef<Register> Ops) const {
664 Register DstReg = MI.getOperand(0).getReg();
665 Builder.setInsertPt(*MI.getParent(), MI);
666 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
667
668 if (Ops.size() == 1)
669 Builder.buildCopy(NewDstReg, Ops[0]);
670 else
671 Builder.buildMergeLikeInstr(NewDstReg, Ops);
672
673 replaceRegWith(MRI, DstReg, NewDstReg);
674 MI.eraseFromParent();
675}
676
677namespace {
678
679/// Select a preference between two uses. CurrentUse is the current preference
680/// while *ForCandidate is attributes of the candidate under consideration.
681PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
682 PreferredTuple &CurrentUse,
683 const LLT TyForCandidate,
684 unsigned OpcodeForCandidate,
685 MachineInstr *MIForCandidate) {
686 if (!CurrentUse.Ty.isValid()) {
687 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
688 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
689 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
690 return CurrentUse;
691 }
692
693 // We permit the extend to hoist through basic blocks but this is only
694 // sensible if the target has extending loads. If you end up lowering back
695 // into a load and extend during the legalizer then the end result is
696 // hoisting the extend up to the load.
697
698 // Prefer defined extensions to undefined extensions as these are more
699 // likely to reduce the number of instructions.
700 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
701 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
702 return CurrentUse;
703 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
704 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
705 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
706
707 // Prefer sign extensions to zero extensions as sign-extensions tend to be
708 // more expensive. Don't do this if the load is already a zero-extend load
709 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
710 // later.
711 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
712 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
713 OpcodeForCandidate == TargetOpcode::G_ZEXT)
714 return CurrentUse;
715 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
716 OpcodeForCandidate == TargetOpcode::G_SEXT)
717 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
718 }
719
720 // This is potentially target specific. We've chosen the largest type
721 // because G_TRUNC is usually free. One potential catch with this is that
722 // some targets have a reduced number of larger registers than smaller
723 // registers and this choice potentially increases the live-range for the
724 // larger value.
725 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
726 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
727 }
728 return CurrentUse;
729}
730
731/// Find a suitable place to insert some instructions and insert them. This
732/// function accounts for special cases like inserting before a PHI node.
733/// The current strategy for inserting before PHI's is to duplicate the
734/// instructions for each predecessor. However, while that's ok for G_TRUNC
735/// on most targets since it generally requires no code, other targets/cases may
736/// want to try harder to find a dominating block.
737static void InsertInsnsWithoutSideEffectsBeforeUse(
740 MachineOperand &UseMO)>
741 Inserter) {
742 MachineInstr &UseMI = *UseMO.getParent();
743
744 MachineBasicBlock *InsertBB = UseMI.getParent();
745
746 // If the use is a PHI then we want the predecessor block instead.
747 if (UseMI.isPHI()) {
748 MachineOperand *PredBB = std::next(&UseMO);
749 InsertBB = PredBB->getMBB();
750 }
751
752 // If the block is the same block as the def then we want to insert just after
753 // the def instead of at the start of the block.
754 if (InsertBB == DefMI.getParent()) {
756 Inserter(InsertBB, std::next(InsertPt), UseMO);
757 return;
758 }
759
760 // Otherwise we want the start of the BB
761 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
762}
763} // end anonymous namespace
764
766 PreferredTuple Preferred;
767 if (matchCombineExtendingLoads(MI, Preferred)) {
768 applyCombineExtendingLoads(MI, Preferred);
769 return true;
770 }
771 return false;
772}
773
774static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
775 unsigned CandidateLoadOpc;
776 switch (ExtOpc) {
777 case TargetOpcode::G_ANYEXT:
778 CandidateLoadOpc = TargetOpcode::G_LOAD;
779 break;
780 case TargetOpcode::G_SEXT:
781 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
782 break;
783 case TargetOpcode::G_ZEXT:
784 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
785 break;
786 default:
787 llvm_unreachable("Unexpected extend opc");
788 }
789 return CandidateLoadOpc;
790}
791
793 MachineInstr &MI, PreferredTuple &Preferred) const {
794 // We match the loads and follow the uses to the extend instead of matching
795 // the extends and following the def to the load. This is because the load
796 // must remain in the same position for correctness (unless we also add code
797 // to find a safe place to sink it) whereas the extend is freely movable.
798 // It also prevents us from duplicating the load for the volatile case or just
799 // for performance.
800 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
801 if (!LoadMI)
802 return false;
803
804 Register LoadReg = LoadMI->getDstReg();
805
806 LLT LoadValueTy = MRI.getType(LoadReg);
807 if (!LoadValueTy.isScalar())
808 return false;
809
810 // Most architectures are going to legalize <s8 loads into at least a 1 byte
811 // load, and the MMOs can only describe memory accesses in multiples of bytes.
812 // If we try to perform extload combining on those, we can end up with
813 // %a(s8) = extload %ptr (load 1 byte from %ptr)
814 // ... which is an illegal extload instruction.
815 if (LoadValueTy.getSizeInBits() < 8)
816 return false;
817
818 // For non power-of-2 types, they will very likely be legalized into multiple
819 // loads. Don't bother trying to match them into extending loads.
821 return false;
822
823 // Find the preferred type aside from the any-extends (unless it's the only
824 // one) and non-extending ops. We'll emit an extending load to that type and
825 // and emit a variant of (extend (trunc X)) for the others according to the
826 // relative type sizes. At the same time, pick an extend to use based on the
827 // extend involved in the chosen type.
828 unsigned PreferredOpcode =
829 isa<GLoad>(&MI)
830 ? TargetOpcode::G_ANYEXT
831 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
832 Preferred = {LLT(), PreferredOpcode, nullptr};
833 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
834 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
835 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
836 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
837 const auto &MMO = LoadMI->getMMO();
838 // Don't do anything for atomics.
839 if (MMO.isAtomic())
840 continue;
841 // Check for legality.
842 if (!isPreLegalize()) {
843 LegalityQuery::MemDesc MMDesc(MMO);
844 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
845 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
846 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
847 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
848 .Action != LegalizeActions::Legal)
849 continue;
850 }
851 Preferred = ChoosePreferredUse(MI, Preferred,
852 MRI.getType(UseMI.getOperand(0).getReg()),
853 UseMI.getOpcode(), &UseMI);
854 }
855 }
856
857 // There were no extends
858 if (!Preferred.MI)
859 return false;
860 // It should be impossible to chose an extend without selecting a different
861 // type since by definition the result of an extend is larger.
862 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
863
864 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
865 return true;
866}
867
869 MachineInstr &MI, PreferredTuple &Preferred) const {
870 // Rewrite the load to the chosen extending load.
871 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
872
873 // Inserter to insert a truncate back to the original type at a given point
874 // with some basic CSE to limit truncate duplication to one per BB.
876 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
877 MachineBasicBlock::iterator InsertBefore,
878 MachineOperand &UseMO) {
879 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
880 if (PreviouslyEmitted) {
881 Observer.changingInstr(*UseMO.getParent());
882 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
883 Observer.changedInstr(*UseMO.getParent());
884 return;
885 }
886
887 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
888 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
889 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
890 EmittedInsns[InsertIntoBB] = NewMI;
891 replaceRegOpWith(MRI, UseMO, NewDstReg);
892 };
893
894 Observer.changingInstr(MI);
895 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
896 MI.setDesc(Builder.getTII().get(LoadOpc));
897
898 // Rewrite all the uses to fix up the types.
899 auto &LoadValue = MI.getOperand(0);
901 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
902
903 for (auto *UseMO : Uses) {
904 MachineInstr *UseMI = UseMO->getParent();
905
906 // If the extend is compatible with the preferred extend then we should fix
907 // up the type and extend so that it uses the preferred use.
908 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
909 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
910 Register UseDstReg = UseMI->getOperand(0).getReg();
911 MachineOperand &UseSrcMO = UseMI->getOperand(1);
912 const LLT UseDstTy = MRI.getType(UseDstReg);
913 if (UseDstReg != ChosenDstReg) {
914 if (Preferred.Ty == UseDstTy) {
915 // If the use has the same type as the preferred use, then merge
916 // the vregs and erase the extend. For example:
917 // %1:_(s8) = G_LOAD ...
918 // %2:_(s32) = G_SEXT %1(s8)
919 // %3:_(s32) = G_ANYEXT %1(s8)
920 // ... = ... %3(s32)
921 // rewrites to:
922 // %2:_(s32) = G_SEXTLOAD ...
923 // ... = ... %2(s32)
924 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
925 Observer.erasingInstr(*UseMO->getParent());
926 UseMO->getParent()->eraseFromParent();
927 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
928 // If the preferred size is smaller, then keep the extend but extend
929 // from the result of the extending load. For example:
930 // %1:_(s8) = G_LOAD ...
931 // %2:_(s32) = G_SEXT %1(s8)
932 // %3:_(s64) = G_ANYEXT %1(s8)
933 // ... = ... %3(s64)
934 /// rewrites to:
935 // %2:_(s32) = G_SEXTLOAD ...
936 // %3:_(s64) = G_ANYEXT %2:_(s32)
937 // ... = ... %3(s64)
938 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
939 } else {
940 // If the preferred size is large, then insert a truncate. For
941 // example:
942 // %1:_(s8) = G_LOAD ...
943 // %2:_(s64) = G_SEXT %1(s8)
944 // %3:_(s32) = G_ZEXT %1(s8)
945 // ... = ... %3(s32)
946 /// rewrites to:
947 // %2:_(s64) = G_SEXTLOAD ...
948 // %4:_(s8) = G_TRUNC %2:_(s32)
949 // %3:_(s64) = G_ZEXT %2:_(s8)
950 // ... = ... %3(s64)
951 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
952 InsertTruncAt);
953 }
954 continue;
955 }
956 // The use is (one of) the uses of the preferred use we chose earlier.
957 // We're going to update the load to def this value later so just erase
958 // the old extend.
959 Observer.erasingInstr(*UseMO->getParent());
960 UseMO->getParent()->eraseFromParent();
961 continue;
962 }
963
964 // The use isn't an extend. Truncate back to the type we originally loaded.
965 // This is free on many targets.
966 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
967 }
968
969 MI.getOperand(0).setReg(ChosenDstReg);
970 Observer.changedInstr(MI);
971}
972
974 BuildFnTy &MatchInfo) const {
975 assert(MI.getOpcode() == TargetOpcode::G_AND);
976
977 // If we have the following code:
978 // %mask = G_CONSTANT 255
979 // %ld = G_LOAD %ptr, (load s16)
980 // %and = G_AND %ld, %mask
981 //
982 // Try to fold it into
983 // %ld = G_ZEXTLOAD %ptr, (load s8)
984
985 Register Dst = MI.getOperand(0).getReg();
986 if (MRI.getType(Dst).isVector())
987 return false;
988
989 auto MaybeMask =
990 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
991 if (!MaybeMask)
992 return false;
993
994 APInt MaskVal = MaybeMask->Value;
995
996 if (!MaskVal.isMask())
997 return false;
998
999 Register SrcReg = MI.getOperand(1).getReg();
1000 // Don't use getOpcodeDef() here since intermediate instructions may have
1001 // multiple users.
1002 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
1003 if (!LoadMI)
1004 return false;
1005
1006 Register LoadReg = LoadMI->getDstReg();
1007 LLT RegTy = MRI.getType(LoadReg);
1008 Register PtrReg = LoadMI->getPointerReg();
1009 unsigned RegSize = RegTy.getSizeInBits();
1010 unsigned LoadSizeBits = LoadMI->getMemSizeInBits().getValue();
1011 unsigned MaskSizeBits = MaskVal.countr_one();
1012
1013 if ((isa<GSExtLoad>(LoadMI) || MaskSizeBits < LoadSizeBits) &&
1014 !MRI.hasOneNonDBGUse(LoadReg))
1015 return false;
1016
1017 // The mask may not be larger than the in-memory type, as it might cover sign
1018 // extended bits
1019 if (MaskSizeBits > LoadSizeBits)
1020 return false;
1021
1022 // If the mask covers the whole destination register, there's nothing to
1023 // extend
1024 if (MaskSizeBits >= RegSize)
1025 return false;
1026
1027 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1028 // at least byte loads. Avoid creating such loads here
1029 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1030 return false;
1031
1032 const MachineMemOperand &MMO = LoadMI->getMMO();
1033 LegalityQuery::MemDesc MemDesc(MMO);
1034
1035 // Don't modify the memory access size if this is atomic/volatile, but we can
1036 // still adjust the opcode to indicate the high bit behavior.
1037 if (LoadMI->isSimple())
1038 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1039 else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
1040 return false;
1041
1042 // TODO: Could check if it's legal with the reduced or original memory size.
1044 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1045 return false;
1046
1047 MatchInfo = [=](MachineIRBuilder &B) {
1048 B.setInstrAndDebugLoc(*LoadMI);
1049 auto &MF = B.getMF();
1050 auto PtrInfo = MMO.getPointerInfo();
1051 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1052 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1053 replaceRegWith(MRI, LoadReg, Dst);
1054 LoadMI->eraseFromParent();
1055 };
1056 return true;
1057}
1058
1060 const MachineInstr &UseMI) const {
1061 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1062 "shouldn't consider debug uses");
1063 assert(DefMI.getParent() == UseMI.getParent());
1064 if (&DefMI == &UseMI)
1065 return true;
1066 const MachineBasicBlock &MBB = *DefMI.getParent();
1067 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1068 return &MI == &DefMI || &MI == &UseMI;
1069 });
1070 if (DefOrUse == MBB.end())
1071 llvm_unreachable("Block must contain both DefMI and UseMI!");
1072 return &*DefOrUse == &DefMI;
1073}
1074
1076 const MachineInstr &UseMI) const {
1077 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1078 "shouldn't consider debug uses");
1079 if (MDT)
1080 return MDT->dominates(&DefMI, &UseMI);
1081 else if (DefMI.getParent() != UseMI.getParent())
1082 return false;
1083
1084 return isPredecessor(DefMI, UseMI);
1085}
1086
1088 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1089 Register SrcReg = MI.getOperand(1).getReg();
1090 Register LoadUser = SrcReg;
1091
1092 if (MRI.getType(SrcReg).isVector())
1093 return false;
1094
1095 Register TruncSrc;
1096 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1097 LoadUser = TruncSrc;
1098
1099 uint64_t SizeInBits = MI.getOperand(2).getImm();
1100 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1101 // need any extend at all, just a truncate.
1102 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1103 // If truncating more than the original extended value, abort.
1104 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1105 if (TruncSrc &&
1106 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1107 return false;
1108 if (LoadSizeBits == SizeInBits)
1109 return true;
1110 }
1111 return false;
1112}
1113
1115 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1116 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1117 MI.eraseFromParent();
1118}
1119
1121 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1122 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1123
1124 Register DstReg = MI.getOperand(0).getReg();
1125 LLT RegTy = MRI.getType(DstReg);
1126
1127 // Only supports scalars for now.
1128 if (RegTy.isVector())
1129 return false;
1130
1131 Register SrcReg = MI.getOperand(1).getReg();
1132 auto *LoadDef = dyn_cast<GLoad>(MRI.getVRegDef(SrcReg));
1133 if (!LoadDef)
1134 return false;
1135
1136 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1137 uint64_t ExtFrom = MI.getOperand(2).getImm();
1138
1139 if (MemBits > ExtFrom && !MRI.hasOneNonDBGUse(SrcReg))
1140 return false;
1141
1142 // If the sign extend extends from a narrower width than the load's width,
1143 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1144 // Avoid widening the load at all.
1145 unsigned NewSizeBits = std::min(ExtFrom, MemBits);
1146
1147 // Don't generate G_SEXTLOADs with a < 1 byte width.
1148 if (NewSizeBits < 8)
1149 return false;
1150 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1151 // anyway for most targets.
1152 if (!isPowerOf2_32(NewSizeBits))
1153 return false;
1154
1155 const MachineMemOperand &MMO = LoadDef->getMMO();
1156 LegalityQuery::MemDesc MMDesc(MMO);
1157
1158 // Don't modify the memory access size if this is atomic/volatile, but we can
1159 // still adjust the opcode to indicate the high bit behavior.
1160 if (LoadDef->isSimple())
1161 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1162 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1163 return false;
1164
1165 // TODO: Could check if it's legal with the reduced or original memory size.
1166 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1167 {MRI.getType(LoadDef->getDstReg()),
1168 MRI.getType(LoadDef->getPointerReg())},
1169 {MMDesc}}))
1170 return false;
1171
1172 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1173 return true;
1174}
1175
1177 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1178 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1179 Register LoadReg;
1180 unsigned ScalarSizeBits;
1181 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1182 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1183
1184 // If we have the following:
1185 // %ld = G_LOAD %ptr, (load 2)
1186 // %ext = G_SEXT_INREG %ld, 8
1187 // ==>
1188 // %ld = G_SEXTLOAD %ptr (load 1)
1189
1190 auto &MMO = LoadDef->getMMO();
1191 Builder.setInstrAndDebugLoc(*LoadDef);
1192 auto &MF = Builder.getMF();
1193 auto PtrInfo = MMO.getPointerInfo();
1194 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1195 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1196 LoadDef->getPointerReg(), *NewMMO);
1197 replaceRegWith(MRI, LoadReg, MI.getOperand(0).getReg());
1198 MI.eraseFromParent();
1199
1200 // Not all loads can be deleted, so make sure the old one is removed.
1201 LoadDef->eraseFromParent();
1202}
1203
1204/// Return true if 'MI' is a load or a store that may be fold it's address
1205/// operand into the load / store addressing mode.
1207 MachineRegisterInfo &MRI) {
1209 auto *MF = MI->getMF();
1210 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1211 if (!Addr)
1212 return false;
1213
1214 AM.HasBaseReg = true;
1215 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1216 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1217 else
1218 AM.Scale = 1; // [reg +/- reg]
1219
1220 return TLI.isLegalAddressingMode(
1221 MF->getDataLayout(), AM,
1222 getTypeForLLT(MI->getMMO().getMemoryType(),
1223 MF->getFunction().getContext()),
1224 MI->getMMO().getAddrSpace());
1225}
1226
1227static unsigned getIndexedOpc(unsigned LdStOpc) {
1228 switch (LdStOpc) {
1229 case TargetOpcode::G_LOAD:
1230 return TargetOpcode::G_INDEXED_LOAD;
1231 case TargetOpcode::G_STORE:
1232 return TargetOpcode::G_INDEXED_STORE;
1233 case TargetOpcode::G_ZEXTLOAD:
1234 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1235 case TargetOpcode::G_SEXTLOAD:
1236 return TargetOpcode::G_INDEXED_SEXTLOAD;
1237 default:
1238 llvm_unreachable("Unexpected opcode");
1239 }
1240}
1241
1242bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1243 // Check for legality.
1244 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1245 LLT Ty = MRI.getType(LdSt.getReg(0));
1246 LLT MemTy = LdSt.getMMO().getMemoryType();
1248 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1250 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1251 SmallVector<LLT> OpTys;
1252 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1253 OpTys = {PtrTy, Ty, Ty};
1254 else
1255 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1256
1257 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1258 return isLegal(Q);
1259}
1260
1262 "post-index-use-threshold", cl::Hidden, cl::init(32),
1263 cl::desc("Number of uses of a base pointer to check before it is no longer "
1264 "considered for post-indexing."));
1265
1266bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1268 bool &RematOffset) const {
1269 // We're looking for the following pattern, for either load or store:
1270 // %baseptr:_(p0) = ...
1271 // G_STORE %val(s64), %baseptr(p0)
1272 // %offset:_(s64) = G_CONSTANT i64 -256
1273 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1274 const auto &TLI = getTargetLowering();
1275
1276 Register Ptr = LdSt.getPointerReg();
1277 // If the store is the only use, don't bother.
1278 if (MRI.hasOneNonDBGUse(Ptr))
1279 return false;
1280
1281 if (!isIndexedLoadStoreLegal(LdSt))
1282 return false;
1283
1284 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1285 return false;
1286
1287 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1288 auto *PtrDef = MRI.getVRegDef(Ptr);
1289
1290 unsigned NumUsesChecked = 0;
1291 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1292 if (++NumUsesChecked > PostIndexUseThreshold)
1293 return false; // Try to avoid exploding compile time.
1294
1295 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1296 // The use itself might be dead. This can happen during combines if DCE
1297 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1298 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1299 continue;
1300
1301 // Check the user of this isn't the store, otherwise we'd be generate a
1302 // indexed store defining its own use.
1303 if (StoredValDef == &Use)
1304 continue;
1305
1306 Offset = PtrAdd->getOffsetReg();
1307 if (!ForceLegalIndexing &&
1308 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1309 /*IsPre*/ false, MRI))
1310 continue;
1311
1312 // Make sure the offset calculation is before the potentially indexed op.
1313 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1314 RematOffset = false;
1315 if (!dominates(*OffsetDef, LdSt)) {
1316 // If the offset however is just a G_CONSTANT, we can always just
1317 // rematerialize it where we need it.
1318 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1319 continue;
1320 RematOffset = true;
1321 }
1322
1323 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1324 if (&BasePtrUse == PtrDef)
1325 continue;
1326
1327 // If the user is a later load/store that can be post-indexed, then don't
1328 // combine this one.
1329 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1330 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1331 dominates(LdSt, *BasePtrLdSt) &&
1332 isIndexedLoadStoreLegal(*BasePtrLdSt))
1333 return false;
1334
1335 // Now we're looking for the key G_PTR_ADD instruction, which contains
1336 // the offset add that we want to fold.
1337 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1338 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1339 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1340 // If the use is in a different block, then we may produce worse code
1341 // due to the extra register pressure.
1342 if (BaseUseUse.getParent() != LdSt.getParent())
1343 return false;
1344
1345 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1346 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1347 return false;
1348 }
1349 if (!dominates(LdSt, BasePtrUse))
1350 return false; // All use must be dominated by the load/store.
1351 }
1352 }
1353
1354 Addr = PtrAdd->getReg(0);
1355 Base = PtrAdd->getBaseReg();
1356 return true;
1357 }
1358
1359 return false;
1360}
1361
1362bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1363 Register &Base,
1364 Register &Offset) const {
1365 auto &MF = *LdSt.getParent()->getParent();
1366 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1367
1368 Addr = LdSt.getPointerReg();
1369 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1370 MRI.hasOneNonDBGUse(Addr))
1371 return false;
1372
1373 if (!ForceLegalIndexing &&
1374 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1375 return false;
1376
1377 if (!isIndexedLoadStoreLegal(LdSt))
1378 return false;
1379
1380 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1381 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1382 return false;
1383
1384 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1385 // Would require a copy.
1386 if (Base == St->getValueReg())
1387 return false;
1388
1389 // We're expecting one use of Addr in MI, but it could also be the
1390 // value stored, which isn't actually dominated by the instruction.
1391 if (St->getValueReg() == Addr)
1392 return false;
1393 }
1394
1395 // Avoid increasing cross-block register pressure.
1396 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1397 if (AddrUse.getParent() != LdSt.getParent())
1398 return false;
1399
1400 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1401 // That might allow us to end base's liveness here by adjusting the constant.
1402 bool RealUse = false;
1403 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1404 if (!dominates(LdSt, AddrUse))
1405 return false; // All use must be dominated by the load/store.
1406
1407 // If Ptr may be folded in addressing mode of other use, then it's
1408 // not profitable to do this transformation.
1409 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1410 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1411 RealUse = true;
1412 } else {
1413 RealUse = true;
1414 }
1415 }
1416 return RealUse;
1417}
1418
1420 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1421 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1422
1423 // Check if there is a load that defines the vector being extracted from.
1424 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1425 if (!LoadMI)
1426 return false;
1427
1428 Register Vector = MI.getOperand(1).getReg();
1429 LLT VecEltTy = MRI.getType(Vector).getElementType();
1430
1431 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1432
1433 // Checking whether we should reduce the load width.
1434 if (!MRI.hasOneNonDBGUse(Vector))
1435 return false;
1436
1437 // Check if the defining load is simple.
1438 if (!LoadMI->isSimple())
1439 return false;
1440
1441 // If the vector element type is not a multiple of a byte then we are unable
1442 // to correctly compute an address to load only the extracted element as a
1443 // scalar.
1444 if (!VecEltTy.isByteSized())
1445 return false;
1446
1447 // Check for load fold barriers between the extraction and the load.
1448 if (MI.getParent() != LoadMI->getParent())
1449 return false;
1450 const unsigned MaxIter = 20;
1451 unsigned Iter = 0;
1452 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1453 if (II->isLoadFoldBarrier())
1454 return false;
1455 if (Iter++ == MaxIter)
1456 return false;
1457 }
1458
1459 // Check if the new load that we are going to create is legal
1460 // if we are in the post-legalization phase.
1461 MachineMemOperand MMO = LoadMI->getMMO();
1462 Align Alignment = MMO.getAlign();
1463 MachinePointerInfo PtrInfo;
1465
1466 // Finding the appropriate PtrInfo if offset is a known constant.
1467 // This is required to create the memory operand for the narrowed load.
1468 // This machine memory operand object helps us infer about legality
1469 // before we proceed to combine the instruction.
1470 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1471 int Elt = CVal->getZExtValue();
1472 // FIXME: should be (ABI size)*Elt.
1473 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1474 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1475 } else {
1476 // Discard the pointer info except the address space because the memory
1477 // operand can't represent this new access since the offset is variable.
1478 Offset = VecEltTy.getSizeInBits() / 8;
1480 }
1481
1482 Alignment = commonAlignment(Alignment, Offset);
1483
1484 Register VecPtr = LoadMI->getPointerReg();
1485 LLT PtrTy = MRI.getType(VecPtr);
1486
1487 MachineFunction &MF = *MI.getMF();
1488 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1489
1490 LegalityQuery::MemDesc MMDesc(*NewMMO);
1491
1493 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1494 return false;
1495
1496 // Load must be allowed and fast on the target.
1498 auto &DL = MF.getDataLayout();
1499 unsigned Fast = 0;
1500 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1501 &Fast) ||
1502 !Fast)
1503 return false;
1504
1505 Register Result = MI.getOperand(0).getReg();
1506 Register Index = MI.getOperand(2).getReg();
1507
1508 MatchInfo = [=](MachineIRBuilder &B) {
1509 GISelObserverWrapper DummyObserver;
1510 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1511 //// Get pointer to the vector element.
1512 Register finalPtr = Helper.getVectorElementPointer(
1513 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1514 Index);
1515 // New G_LOAD instruction.
1516 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1517 // Remove original GLOAD instruction.
1518 LoadMI->eraseFromParent();
1519 };
1520
1521 return true;
1522}
1523
1525 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1526 auto &LdSt = cast<GLoadStore>(MI);
1527
1528 if (LdSt.isAtomic())
1529 return false;
1530
1531 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1532 MatchInfo.Offset);
1533 if (!MatchInfo.IsPre &&
1534 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1535 MatchInfo.Offset, MatchInfo.RematOffset))
1536 return false;
1537
1538 return true;
1539}
1540
1542 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1543 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1544 unsigned Opcode = MI.getOpcode();
1545 bool IsStore = Opcode == TargetOpcode::G_STORE;
1546 unsigned NewOpcode = getIndexedOpc(Opcode);
1547
1548 // If the offset constant didn't happen to dominate the load/store, we can
1549 // just clone it as needed.
1550 if (MatchInfo.RematOffset) {
1551 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1552 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1553 *OldCst->getOperand(1).getCImm());
1554 MatchInfo.Offset = NewCst.getReg(0);
1555 }
1556
1557 auto MIB = Builder.buildInstr(NewOpcode);
1558 if (IsStore) {
1559 MIB.addDef(MatchInfo.Addr);
1560 MIB.addUse(MI.getOperand(0).getReg());
1561 } else {
1562 MIB.addDef(MI.getOperand(0).getReg());
1563 MIB.addDef(MatchInfo.Addr);
1564 }
1565
1566 MIB.addUse(MatchInfo.Base);
1567 MIB.addUse(MatchInfo.Offset);
1568 MIB.addImm(MatchInfo.IsPre);
1569 MIB->cloneMemRefs(*MI.getMF(), MI);
1570 MI.eraseFromParent();
1571 AddrDef.eraseFromParent();
1572
1573 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1574}
1575
1577 MachineInstr *&OtherMI) const {
1578 unsigned Opcode = MI.getOpcode();
1579 bool IsDiv, IsSigned;
1580
1581 switch (Opcode) {
1582 default:
1583 llvm_unreachable("Unexpected opcode!");
1584 case TargetOpcode::G_SDIV:
1585 case TargetOpcode::G_UDIV: {
1586 IsDiv = true;
1587 IsSigned = Opcode == TargetOpcode::G_SDIV;
1588 break;
1589 }
1590 case TargetOpcode::G_SREM:
1591 case TargetOpcode::G_UREM: {
1592 IsDiv = false;
1593 IsSigned = Opcode == TargetOpcode::G_SREM;
1594 break;
1595 }
1596 }
1597
1598 Register Src1 = MI.getOperand(1).getReg();
1599 unsigned DivOpcode, RemOpcode, DivremOpcode;
1600 if (IsSigned) {
1601 DivOpcode = TargetOpcode::G_SDIV;
1602 RemOpcode = TargetOpcode::G_SREM;
1603 DivremOpcode = TargetOpcode::G_SDIVREM;
1604 } else {
1605 DivOpcode = TargetOpcode::G_UDIV;
1606 RemOpcode = TargetOpcode::G_UREM;
1607 DivremOpcode = TargetOpcode::G_UDIVREM;
1608 }
1609
1610 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1611 return false;
1612
1613 // Combine:
1614 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1615 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1616 // into:
1617 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1618
1619 // Combine:
1620 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1621 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1622 // into:
1623 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1624
1625 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1626 if (MI.getParent() == UseMI.getParent() &&
1627 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1628 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1629 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1630 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1631 OtherMI = &UseMI;
1632 return true;
1633 }
1634 }
1635
1636 return false;
1637}
1638
1640 MachineInstr *&OtherMI) const {
1641 unsigned Opcode = MI.getOpcode();
1642 assert(OtherMI && "OtherMI shouldn't be empty.");
1643
1644 Register DestDivReg, DestRemReg;
1645 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1646 DestDivReg = MI.getOperand(0).getReg();
1647 DestRemReg = OtherMI->getOperand(0).getReg();
1648 } else {
1649 DestDivReg = OtherMI->getOperand(0).getReg();
1650 DestRemReg = MI.getOperand(0).getReg();
1651 }
1652
1653 bool IsSigned =
1654 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1655
1656 // Check which instruction is first in the block so we don't break def-use
1657 // deps by "moving" the instruction incorrectly. Also keep track of which
1658 // instruction is first so we pick it's operands, avoiding use-before-def
1659 // bugs.
1660 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1661 Builder.setInstrAndDebugLoc(*FirstInst);
1662
1663 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1664 : TargetOpcode::G_UDIVREM,
1665 {DestDivReg, DestRemReg},
1666 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1667 MI.eraseFromParent();
1668 OtherMI->eraseFromParent();
1669}
1670
1672 MachineInstr &MI, MachineInstr *&BrCond) const {
1673 assert(MI.getOpcode() == TargetOpcode::G_BR);
1674
1675 // Try to match the following:
1676 // bb1:
1677 // G_BRCOND %c1, %bb2
1678 // G_BR %bb3
1679 // bb2:
1680 // ...
1681 // bb3:
1682
1683 // The above pattern does not have a fall through to the successor bb2, always
1684 // resulting in a branch no matter which path is taken. Here we try to find
1685 // and replace that pattern with conditional branch to bb3 and otherwise
1686 // fallthrough to bb2. This is generally better for branch predictors.
1687
1688 MachineBasicBlock *MBB = MI.getParent();
1690 if (BrIt == MBB->begin())
1691 return false;
1692 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1693
1694 BrCond = &*std::prev(BrIt);
1695 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1696 return false;
1697
1698 // Check that the next block is the conditional branch target. Also make sure
1699 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1700 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1701 return BrCondTarget != MI.getOperand(0).getMBB() &&
1702 MBB->isLayoutSuccessor(BrCondTarget);
1703}
1704
1706 MachineInstr &MI, MachineInstr *&BrCond) const {
1707 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1708 Builder.setInstrAndDebugLoc(*BrCond);
1709 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1710 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1711 // this to i1 only since we might not know for sure what kind of
1712 // compare generated the condition value.
1713 auto True = Builder.buildConstant(
1714 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1715 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1716
1717 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1718 Observer.changingInstr(MI);
1719 MI.getOperand(0).setMBB(FallthroughBB);
1720 Observer.changedInstr(MI);
1721
1722 // Change the conditional branch to use the inverted condition and
1723 // new target block.
1724 Observer.changingInstr(*BrCond);
1725 BrCond->getOperand(0).setReg(Xor.getReg(0));
1726 BrCond->getOperand(1).setMBB(BrTarget);
1727 Observer.changedInstr(*BrCond);
1728}
1729
1732 unsigned MaxLen) const {
1733 auto &[Dst, Src, KnownLen, Alignment, DstAlignCanChange, MemOps] = MatchInfo;
1734 return canLowerMemCpyFamily(MI, MRI, MaxLen, Dst, Src, KnownLen, Alignment,
1735 DstAlignCanChange, MemOps);
1736}
1737
1739 MachineInstr &MI, MemCpyFamilyLoweringInfo &MatchInfo) const {
1740 auto &[Dst, Src, KnownLen, Alignment, DstAlignCanChange, MemOps] = MatchInfo;
1741 MachineIRBuilder HelperBuilder(MI);
1742 GISelObserverWrapper DummyObserver;
1743 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1744 bool Changed = Helper.lowerMemCpyFamily(MI, Dst, Src, KnownLen, Alignment,
1745 DstAlignCanChange, MemOps) ==
1747 assert(Changed && "expected memcpy-family instruction to lower");
1748 (void)Changed;
1749}
1750
1752 unsigned MaxLen) const {
1753 MachineIRBuilder HelperBuilder(MI);
1754 GISelObserverWrapper DummyObserver;
1755 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1756 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1758}
1759
1761 const MachineRegisterInfo &MRI,
1762 const APFloat &Val) {
1763 APFloat Result(Val);
1764 switch (MI.getOpcode()) {
1765 default:
1766 llvm_unreachable("Unexpected opcode!");
1767 case TargetOpcode::G_FNEG: {
1768 Result.changeSign();
1769 return Result;
1770 }
1771 case TargetOpcode::G_FABS: {
1772 Result.clearSign();
1773 return Result;
1774 }
1775 case TargetOpcode::G_FCEIL:
1776 Result.roundToIntegral(APFloat::rmTowardPositive);
1777 return Result;
1778 case TargetOpcode::G_FFLOOR:
1779 Result.roundToIntegral(APFloat::rmTowardNegative);
1780 return Result;
1781 case TargetOpcode::G_INTRINSIC_TRUNC:
1782 Result.roundToIntegral(APFloat::rmTowardZero);
1783 return Result;
1784 case TargetOpcode::G_INTRINSIC_ROUND:
1785 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1786 return Result;
1787 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1788 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1789 return Result;
1790 case TargetOpcode::G_FRINT:
1791 case TargetOpcode::G_FNEARBYINT:
1792 // Use default rounding mode (round to nearest, ties to even)
1793 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1794 return Result;
1795 case TargetOpcode::G_FPEXT:
1796 case TargetOpcode::G_FPTRUNC: {
1797 bool Unused;
1798 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1800 &Unused);
1801 return Result;
1802 }
1803 case TargetOpcode::G_FSQRT: {
1804 bool Unused;
1806 &Unused);
1807 Result = APFloat(sqrt(Result.convertToDouble()));
1808 break;
1809 }
1810 case TargetOpcode::G_FLOG2: {
1811 bool Unused;
1813 &Unused);
1814 Result = APFloat(log2(Result.convertToDouble()));
1815 break;
1816 }
1817 }
1818 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1819 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1820 // `G_FLOG2` reach here.
1821 bool Unused;
1822 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1823 return Result;
1824}
1825
1827 MachineInstr &MI, const ConstantFP *Cst) const {
1828 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1829 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1830 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1831 MI.eraseFromParent();
1832}
1833
1835 PtrAddChain &MatchInfo) const {
1836 // We're trying to match the following pattern:
1837 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1838 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1839 // -->
1840 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1841
1842 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1843 return false;
1844
1845 Register Add2 = MI.getOperand(1).getReg();
1846 Register Imm1 = MI.getOperand(2).getReg();
1847 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1848 if (!MaybeImmVal)
1849 return false;
1850
1851 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1852 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1853 return false;
1854
1855 Register Base = Add2Def->getOperand(1).getReg();
1856 Register Imm2 = Add2Def->getOperand(2).getReg();
1857 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1858 if (!MaybeImm2Val)
1859 return false;
1860
1861 // Check if the new combined immediate forms an illegal addressing mode.
1862 // Do not combine if it was legal before but would get illegal.
1863 // To do so, we need to find a load/store user of the pointer to get
1864 // the access type.
1865 Type *AccessTy = nullptr;
1866 auto &MF = *MI.getMF();
1867 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1868 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1869 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1870 MF.getFunction().getContext());
1871 break;
1872 }
1873 }
1875 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1876 AMNew.BaseOffs = CombinedImm.getSExtValue();
1877 if (AccessTy) {
1878 AMNew.HasBaseReg = true;
1880 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1881 AMOld.HasBaseReg = true;
1882 unsigned AS = MRI.getType(Add2).getAddressSpace();
1883 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1884 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1885 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1886 return false;
1887 }
1888
1889 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1890 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1891 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1892 // largest signed integer that fits into the index type, which is the maximum
1893 // size of allocated objects according to the IR Language Reference.
1894 unsigned PtrAddFlags = MI.getFlags();
1895 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1896 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1897 bool IsInBounds =
1898 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1899 unsigned Flags = 0;
1900 if (IsNoUWrap)
1902 if (IsInBounds) {
1905 }
1906
1907 // Pass the combined immediate to the apply function.
1908 MatchInfo.Imm = AMNew.BaseOffs;
1909 MatchInfo.Base = Base;
1910 MatchInfo.Bank = getRegBank(Imm2);
1911 MatchInfo.Flags = Flags;
1912 return true;
1913}
1914
1916 PtrAddChain &MatchInfo) const {
1917 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1918 MachineIRBuilder MIB(MI);
1919 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1920 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1921 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1922 Observer.changingInstr(MI);
1923 MI.getOperand(1).setReg(MatchInfo.Base);
1924 MI.getOperand(2).setReg(NewOffset.getReg(0));
1925 MI.setFlags(MatchInfo.Flags);
1926 Observer.changedInstr(MI);
1927}
1928
1930 RegisterImmPair &MatchInfo) const {
1931 // We're trying to match the following pattern with any of
1932 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1933 // %t1 = SHIFT %base, G_CONSTANT imm1
1934 // %root = SHIFT %t1, G_CONSTANT imm2
1935 // -->
1936 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1937
1938 unsigned Opcode = MI.getOpcode();
1939 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1940 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1941 Opcode == TargetOpcode::G_USHLSAT) &&
1942 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1943
1944 Register Shl2 = MI.getOperand(1).getReg();
1945 Register Imm1 = MI.getOperand(2).getReg();
1946 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1947 if (!MaybeImmVal)
1948 return false;
1949
1950 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1951 if (Shl2Def->getOpcode() != Opcode)
1952 return false;
1953
1954 Register Base = Shl2Def->getOperand(1).getReg();
1955 Register Imm2 = Shl2Def->getOperand(2).getReg();
1956 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1957 if (!MaybeImm2Val)
1958 return false;
1959
1960 // Pass the combined immediate to the apply function.
1961 MatchInfo.Imm =
1962 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1963 MatchInfo.Reg = Base;
1964
1965 // There is no simple replacement for a saturating unsigned left shift that
1966 // exceeds the scalar size.
1967 if (Opcode == TargetOpcode::G_USHLSAT &&
1968 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1969 return false;
1970
1971 return true;
1972}
1973
1975 RegisterImmPair &MatchInfo) const {
1976 unsigned Opcode = MI.getOpcode();
1977 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1978 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1979 Opcode == TargetOpcode::G_USHLSAT) &&
1980 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1981
1982 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1983 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1984 auto Imm = MatchInfo.Imm;
1985
1986 if (Imm >= ScalarSizeInBits) {
1987 // Any logical shift that exceeds scalar size will produce zero.
1988 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1989 Builder.buildConstant(MI.getOperand(0), 0);
1990 MI.eraseFromParent();
1991 return;
1992 }
1993 // Arithmetic shift and saturating signed left shift have no effect beyond
1994 // scalar size.
1995 Imm = ScalarSizeInBits - 1;
1996 }
1997
1998 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1999 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
2000 Observer.changingInstr(MI);
2001 MI.getOperand(1).setReg(MatchInfo.Reg);
2002 MI.getOperand(2).setReg(NewImm);
2003 Observer.changedInstr(MI);
2004}
2005
2007 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2008 // We're trying to match the following pattern with any of
2009 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
2010 // with any of G_AND/G_OR/G_XOR logic instructions.
2011 // %t1 = SHIFT %X, G_CONSTANT C0
2012 // %t2 = LOGIC %t1, %Y
2013 // %root = SHIFT %t2, G_CONSTANT C1
2014 // -->
2015 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
2016 // %t4 = SHIFT %Y, G_CONSTANT C1
2017 // %root = LOGIC %t3, %t4
2018 unsigned ShiftOpcode = MI.getOpcode();
2019 assert((ShiftOpcode == TargetOpcode::G_SHL ||
2020 ShiftOpcode == TargetOpcode::G_ASHR ||
2021 ShiftOpcode == TargetOpcode::G_LSHR ||
2022 ShiftOpcode == TargetOpcode::G_USHLSAT ||
2023 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
2024 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2025
2026 // Match a one-use bitwise logic op.
2027 Register LogicDest = MI.getOperand(1).getReg();
2028 if (!MRI.hasOneNonDBGUse(LogicDest))
2029 return false;
2030
2031 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
2032 unsigned LogicOpcode = LogicMI->getOpcode();
2033 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
2034 LogicOpcode != TargetOpcode::G_XOR)
2035 return false;
2036
2037 // Find a matching one-use shift by constant.
2038 const Register C1 = MI.getOperand(2).getReg();
2039 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
2040 if (!MaybeImmVal || MaybeImmVal->Value == 0)
2041 return false;
2042
2043 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
2044
2045 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
2046 // Shift should match previous one and should be a one-use.
2047 if (MI->getOpcode() != ShiftOpcode ||
2048 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2049 return false;
2050
2051 // Must be a constant.
2052 auto MaybeImmVal =
2053 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
2054 if (!MaybeImmVal)
2055 return false;
2056
2057 ShiftVal = MaybeImmVal->Value.getSExtValue();
2058 return true;
2059 };
2060
2061 // Logic ops are commutative, so check each operand for a match.
2062 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
2063 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
2064 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2065 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2066 uint64_t C0Val;
2067
2068 if (matchFirstShift(LogicMIOp1, C0Val)) {
2069 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2070 MatchInfo.Shift2 = LogicMIOp1;
2071 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2072 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2073 MatchInfo.Shift2 = LogicMIOp2;
2074 } else
2075 return false;
2076
2077 MatchInfo.ValSum = C0Val + C1Val;
2078
2079 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2080 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2081 return false;
2082
2083 MatchInfo.Logic = LogicMI;
2084 return true;
2085}
2086
2088 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2089 unsigned Opcode = MI.getOpcode();
2090 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2091 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2092 Opcode == TargetOpcode::G_SSHLSAT) &&
2093 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2094
2095 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2096 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2097
2098 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2099
2100 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2101 Register Shift1 =
2102 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2103
2104 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2105 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2106 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2107 // remove old shift1. And it will cause crash later. So erase it earlier to
2108 // avoid the crash.
2109 MatchInfo.Shift2->eraseFromParent();
2110
2111 Register Shift2Const = MI.getOperand(2).getReg();
2112 Register Shift2 = Builder
2113 .buildInstr(Opcode, {DestType},
2114 {MatchInfo.LogicNonShiftReg, Shift2Const})
2115 .getReg(0);
2116
2117 Register Dest = MI.getOperand(0).getReg();
2118 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2119
2120 // This was one use so it's safe to remove it.
2121 MatchInfo.Logic->eraseFromParent();
2122
2123 MI.eraseFromParent();
2124}
2125
2127 BuildFnTy &MatchInfo) const {
2128 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2129 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2130 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2131 auto &Shl = cast<GenericMachineInstr>(MI);
2132 Register DstReg = Shl.getReg(0);
2133 Register SrcReg = Shl.getReg(1);
2134 Register ShiftReg = Shl.getReg(2);
2135 Register X, C1;
2136
2137 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2138 return false;
2139
2140 if (!mi_match(SrcReg, MRI,
2142 m_GOr(m_Reg(X), m_Reg(C1))))))
2143 return false;
2144
2145 APInt C1Val, C2Val;
2146 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2147 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2148 return false;
2149
2150 auto *SrcDef = MRI.getVRegDef(SrcReg);
2151 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2152 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2153 LLT SrcTy = MRI.getType(SrcReg);
2154 MatchInfo = [=](MachineIRBuilder &B) {
2155 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2156 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2157 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2158 };
2159 return true;
2160}
2161
2163 LshrOfTruncOfLshr &MatchInfo,
2164 MachineInstr &ShiftMI) const {
2165 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2166
2167 Register N0 = MI.getOperand(1).getReg();
2168 Register N1 = MI.getOperand(2).getReg();
2169 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2170
2171 APInt N1C, N001C;
2172 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2173 return false;
2174 auto N001 = ShiftMI.getOperand(2).getReg();
2175 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2176 return false;
2177
2178 if (N001C.getBitWidth() > N1C.getBitWidth())
2179 N1C = N1C.zext(N001C.getBitWidth());
2180 else
2181 N001C = N001C.zext(N1C.getBitWidth());
2182
2183 Register InnerShift = ShiftMI.getOperand(0).getReg();
2184 LLT InnerShiftTy = MRI.getType(InnerShift);
2185 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2186 if ((N1C + N001C).ult(InnerShiftSize)) {
2187 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2188 MatchInfo.ShiftAmt = N1C + N001C;
2189 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2190 MatchInfo.InnerShiftTy = InnerShiftTy;
2191
2192 if ((N001C + OpSizeInBits) == InnerShiftSize)
2193 return true;
2194 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2195 MatchInfo.Mask = true;
2196 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2197 return true;
2198 }
2199 }
2200 return false;
2201}
2202
2204 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2205 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2206
2207 Register Dst = MI.getOperand(0).getReg();
2208 auto ShiftAmt =
2209 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2210 auto Shift =
2211 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2212 if (MatchInfo.Mask == true) {
2213 APInt MaskVal =
2215 MatchInfo.MaskVal.getZExtValue());
2216 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2217 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2218 Builder.buildTrunc(Dst, And);
2219 } else
2220 Builder.buildTrunc(Dst, Shift);
2221 MI.eraseFromParent();
2222}
2223
2225 unsigned &ShiftVal) const {
2226 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2227 auto MaybeImmVal =
2228 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2229 if (!MaybeImmVal)
2230 return false;
2231
2232 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2233 return (static_cast<int32_t>(ShiftVal) != -1);
2234}
2235
2237 unsigned &ShiftVal) const {
2238 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2239 MachineIRBuilder MIB(MI);
2240 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2241 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2242 Observer.changingInstr(MI);
2243 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2244 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2245 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2247 Observer.changedInstr(MI);
2248}
2249
2251 BuildFnTy &MatchInfo) const {
2252 GSub &Sub = cast<GSub>(MI);
2253
2254 LLT Ty = MRI.getType(Sub.getReg(0));
2255
2256 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2257 return false;
2258
2260 return false;
2261
2262 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2263
2264 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2265 auto NegCst = B.buildConstant(Ty, -Imm);
2266 Observer.changingInstr(MI);
2267 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2268 MI.getOperand(2).setReg(NegCst.getReg(0));
2270 if (Imm.isMinSignedValue())
2272 Observer.changedInstr(MI);
2273 };
2274 return true;
2275}
2276
2277// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2279 RegisterImmPair &MatchData) const {
2280 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2281 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2282 return false;
2283
2284 Register LHS = MI.getOperand(1).getReg();
2285
2286 Register ExtSrc;
2287 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2288 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2289 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2290 return false;
2291
2292 Register RHS = MI.getOperand(2).getReg();
2293 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2294 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2295 if (!MaybeShiftAmtVal)
2296 return false;
2297
2298 if (LI) {
2299 LLT SrcTy = MRI.getType(ExtSrc);
2300
2301 // We only really care about the legality with the shifted value. We can
2302 // pick any type the constant shift amount, so ask the target what to
2303 // use. Otherwise we would have to guess and hope it is reported as legal.
2304 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2305 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2306 return false;
2307 }
2308
2309 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2310 MatchData.Reg = ExtSrc;
2311 MatchData.Imm = ShiftAmt;
2312
2313 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2314 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2315 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2316}
2317
2319 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2320 Register ExtSrcReg = MatchData.Reg;
2321 int64_t ShiftAmtVal = MatchData.Imm;
2322
2323 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2324 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2325 auto NarrowShift =
2326 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2327 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2328 MI.eraseFromParent();
2329}
2330
2332 Register &MatchInfo) const {
2334 SmallVector<Register, 16> MergedValues;
2335 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2336 MergedValues.emplace_back(Merge.getSourceReg(I));
2337
2338 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2339 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2340 return false;
2341
2342 for (unsigned I = 0; I < MergedValues.size(); ++I)
2343 if (MergedValues[I] != Unmerge->getReg(I))
2344 return false;
2345
2346 MatchInfo = Unmerge->getSourceReg();
2347 return true;
2348}
2349
2351 const MachineRegisterInfo &MRI) {
2352 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2353 ;
2354
2355 return Reg;
2356}
2357
2359 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2360 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2361 "Expected an unmerge");
2362 auto &Unmerge = cast<GUnmerge>(MI);
2363 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2364
2365 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2366 if (!SrcInstr)
2367 return false;
2368
2369 // Check the source type of the merge.
2370 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2371 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2372 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2373 if (SrcMergeTy != Dst0Ty && !SameSize)
2374 return false;
2375 // They are the same now (modulo a bitcast).
2376 // We can collect all the src registers.
2377 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2378 Operands.push_back(SrcInstr->getSourceReg(Idx));
2379 return true;
2380}
2381
2383 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2384 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2385 "Expected an unmerge");
2386 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2387 "Not enough operands to replace all defs");
2388 unsigned NumElems = MI.getNumOperands() - 1;
2389
2390 LLT SrcTy = MRI.getType(Operands[0]);
2391 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2392 bool CanReuseInputDirectly = DstTy == SrcTy;
2393 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2394 Register DstReg = MI.getOperand(Idx).getReg();
2395 Register SrcReg = Operands[Idx];
2396
2397 // This combine may run after RegBankSelect, so we need to be aware of
2398 // register banks.
2399 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2400 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2401 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2402 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2403 }
2404
2405 if (CanReuseInputDirectly)
2406 replaceRegWith(MRI, DstReg, SrcReg);
2407 else
2408 Builder.buildCast(DstReg, SrcReg);
2409 }
2410 MI.eraseFromParent();
2411}
2412
2414 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2415 unsigned SrcIdx = MI.getNumOperands() - 1;
2416 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2417 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2418 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2419 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2420 return false;
2421 // Break down the big constant in smaller ones.
2422 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2423 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2424 ? CstVal.getCImm()->getValue()
2425 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2426
2427 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2428 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2429 // Unmerge a constant.
2430 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2431 Csts.emplace_back(Val.trunc(ShiftAmt));
2432 Val = Val.lshr(ShiftAmt);
2433 }
2434
2435 return true;
2436}
2437
2439 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2440 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2441 "Expected an unmerge");
2442 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2443 "Not enough operands to replace all defs");
2444 unsigned NumElems = MI.getNumOperands() - 1;
2445 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2446 Register DstReg = MI.getOperand(Idx).getReg();
2447 Builder.buildConstant(DstReg, Csts[Idx]);
2448 }
2449
2450 MI.eraseFromParent();
2451}
2452
2455 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2456 unsigned SrcIdx = MI.getNumOperands() - 1;
2457 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2458 MatchInfo = [&MI](MachineIRBuilder &B) {
2459 unsigned NumElems = MI.getNumOperands() - 1;
2460 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2461 Register DstReg = MI.getOperand(Idx).getReg();
2462 B.buildUndef(DstReg);
2463 }
2464 };
2465 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2466}
2467
2469 MachineInstr &MI) const {
2470 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2471 "Expected an unmerge");
2472 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2473 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2474 return false;
2475 // Check that all the lanes are dead except the first one.
2476 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2477 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2478 return false;
2479 }
2480 return true;
2481}
2482
2484 MachineInstr &MI) const {
2485 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2486 Register Dst0Reg = MI.getOperand(0).getReg();
2487 Builder.buildTrunc(Dst0Reg, SrcReg);
2488 MI.eraseFromParent();
2489}
2490
2492 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2493 "Expected an unmerge");
2494 Register Dst0Reg = MI.getOperand(0).getReg();
2495 LLT Dst0Ty = MRI.getType(Dst0Reg);
2496 // G_ZEXT on vector applies to each lane, so it will
2497 // affect all destinations. Therefore we won't be able
2498 // to simplify the unmerge to just the first definition.
2499 if (Dst0Ty.isVector())
2500 return false;
2501 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2502 LLT SrcTy = MRI.getType(SrcReg);
2503 if (SrcTy.isVector())
2504 return false;
2505
2506 Register ZExtSrcReg;
2507 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2508 return false;
2509
2510 // Finally we can replace the first definition with
2511 // a zext of the source if the definition is big enough to hold
2512 // all of ZExtSrc bits.
2513 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2514 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2515}
2516
2518 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2519 "Expected an unmerge");
2520
2521 Register Dst0Reg = MI.getOperand(0).getReg();
2522
2523 MachineInstr *ZExtInstr =
2524 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2525 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2526 "Expecting a G_ZEXT");
2527
2528 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2529 LLT Dst0Ty = MRI.getType(Dst0Reg);
2530 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2531
2532 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2533 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2534 } else {
2535 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2536 "ZExt src doesn't fit in destination");
2537 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2538 }
2539
2540 Register ZeroReg;
2541 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2542 if (!ZeroReg)
2543 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2544 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2545 }
2546 MI.eraseFromParent();
2547}
2548
2550 unsigned TargetShiftSize,
2551 unsigned &ShiftVal) const {
2552 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2553 MI.getOpcode() == TargetOpcode::G_LSHR ||
2554 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2555
2556 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2557 if (Ty.isVector()) // TODO:
2558 return false;
2559
2560 // Don't narrow further than the requested size.
2561 unsigned Size = Ty.getSizeInBits();
2562 if (Size <= TargetShiftSize)
2563 return false;
2564
2565 auto MaybeImmVal =
2566 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2567 if (!MaybeImmVal)
2568 return false;
2569
2570 ShiftVal = MaybeImmVal->Value.getSExtValue();
2571 return ShiftVal >= Size / 2 && ShiftVal < Size;
2572}
2573
2575 MachineInstr &MI, const unsigned &ShiftVal) const {
2576 Register DstReg = MI.getOperand(0).getReg();
2577 Register SrcReg = MI.getOperand(1).getReg();
2578 LLT Ty = MRI.getType(SrcReg);
2579 unsigned Size = Ty.getSizeInBits();
2580 unsigned HalfSize = Size / 2;
2581 assert(ShiftVal >= HalfSize);
2582
2583 LLT HalfTy = LLT::scalar(HalfSize);
2584
2585 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2586 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2587
2588 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2589 Register Narrowed = Unmerge.getReg(1);
2590
2591 // dst = G_LSHR s64:x, C for C >= 32
2592 // =>
2593 // lo, hi = G_UNMERGE_VALUES x
2594 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2595
2596 if (NarrowShiftAmt != 0) {
2597 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2598 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2599 }
2600
2601 auto Zero = Builder.buildConstant(HalfTy, 0);
2602 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2603 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2604 Register Narrowed = Unmerge.getReg(0);
2605 // dst = G_SHL s64:x, C for C >= 32
2606 // =>
2607 // lo, hi = G_UNMERGE_VALUES x
2608 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2609 if (NarrowShiftAmt != 0) {
2610 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2611 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2612 }
2613
2614 auto Zero = Builder.buildConstant(HalfTy, 0);
2615 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2616 } else {
2617 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2618 auto Hi = Builder.buildAShr(
2619 HalfTy, Unmerge.getReg(1),
2620 Builder.buildConstant(HalfTy, HalfSize - 1));
2621
2622 if (ShiftVal == HalfSize) {
2623 // (G_ASHR i64:x, 32) ->
2624 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2625 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2626 } else if (ShiftVal == Size - 1) {
2627 // Don't need a second shift.
2628 // (G_ASHR i64:x, 63) ->
2629 // %narrowed = (G_ASHR hi_32(x), 31)
2630 // G_MERGE_VALUES %narrowed, %narrowed
2631 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2632 } else {
2633 auto Lo = Builder.buildAShr(
2634 HalfTy, Unmerge.getReg(1),
2635 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2636
2637 // (G_ASHR i64:x, C) ->, for C >= 32
2638 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2639 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2640 }
2641 }
2642
2643 MI.eraseFromParent();
2644}
2645
2647 MachineInstr &MI, unsigned TargetShiftAmount) const {
2648 unsigned ShiftAmt;
2649 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2650 applyCombineShiftToUnmerge(MI, ShiftAmt);
2651 return true;
2652 }
2653
2654 return false;
2655}
2656
2658 Register &Reg) const {
2659 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2660 Register DstReg = MI.getOperand(0).getReg();
2661 LLT DstTy = MRI.getType(DstReg);
2662 Register SrcReg = MI.getOperand(1).getReg();
2663 return mi_match(SrcReg, MRI,
2664 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2665}
2666
2668 Register &Reg) const {
2669 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2670 Register DstReg = MI.getOperand(0).getReg();
2671 Builder.buildCopy(DstReg, Reg);
2672 MI.eraseFromParent();
2673}
2674
2676 Register &Reg) const {
2677 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2678 Register DstReg = MI.getOperand(0).getReg();
2679 Builder.buildZExtOrTrunc(DstReg, Reg);
2680 MI.eraseFromParent();
2681}
2682
2684 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2685 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2686 Register LHS = MI.getOperand(1).getReg();
2687 Register RHS = MI.getOperand(2).getReg();
2688 LLT IntTy = MRI.getType(LHS);
2689
2690 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2691 // instruction.
2692 PtrReg.second = false;
2693 for (Register SrcReg : {LHS, RHS}) {
2694 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2695 // Don't handle cases where the integer is implicitly converted to the
2696 // pointer width.
2697 LLT PtrTy = MRI.getType(PtrReg.first);
2698 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2699 return true;
2700 }
2701
2702 PtrReg.second = true;
2703 }
2704
2705 return false;
2706}
2707
2709 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2710 Register Dst = MI.getOperand(0).getReg();
2711 Register LHS = MI.getOperand(1).getReg();
2712 Register RHS = MI.getOperand(2).getReg();
2713
2714 const bool DoCommute = PtrReg.second;
2715 if (DoCommute)
2716 std::swap(LHS, RHS);
2717 LHS = PtrReg.first;
2718
2719 LLT PtrTy = MRI.getType(LHS);
2720
2721 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2722 Builder.buildPtrToInt(Dst, PtrAdd);
2723 MI.eraseFromParent();
2724}
2725
2727 APInt &NewCst) const {
2728 auto &PtrAdd = cast<GPtrAdd>(MI);
2729 Register LHS = PtrAdd.getBaseReg();
2730 Register RHS = PtrAdd.getOffsetReg();
2731 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2732
2733 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2734 APInt Cst;
2735 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2736 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2737 // G_INTTOPTR uses zero-extension
2738 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2739 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2740 return true;
2741 }
2742 }
2743
2744 return false;
2745}
2746
2748 APInt &NewCst) const {
2749 auto &PtrAdd = cast<GPtrAdd>(MI);
2750 Register Dst = PtrAdd.getReg(0);
2751
2752 Builder.buildConstant(Dst, NewCst);
2753 PtrAdd.eraseFromParent();
2754}
2755
2757 Register &Reg) const {
2758 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2759 Register DstReg = MI.getOperand(0).getReg();
2760 Register SrcReg = MI.getOperand(1).getReg();
2761 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2762 if (OriginalSrcReg.isValid())
2763 SrcReg = OriginalSrcReg;
2764 LLT DstTy = MRI.getType(DstReg);
2765 return mi_match(SrcReg, MRI,
2766 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2767 canReplaceReg(DstReg, Reg, MRI);
2768}
2769
2771 Register &Reg) const {
2772 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2773 Register DstReg = MI.getOperand(0).getReg();
2774 Register SrcReg = MI.getOperand(1).getReg();
2775 LLT DstTy = MRI.getType(DstReg);
2776 if (mi_match(SrcReg, MRI,
2777 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2778 canReplaceReg(DstReg, Reg, MRI)) {
2779 unsigned DstSize = DstTy.getScalarSizeInBits();
2780 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2781 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2782 }
2783 return false;
2784}
2785
2787 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2788 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2789
2790 // ShiftTy > 32 > TruncTy -> 32
2791 if (ShiftSize > 32 && TruncSize < 32)
2792 return ShiftTy.changeElementSize(32);
2793
2794 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2795 // Some targets like it, some don't, some only like it under certain
2796 // conditions/processor versions, etc.
2797 // A TL hook might be needed for this.
2798
2799 // Don't combine
2800 return ShiftTy;
2801}
2802
2804 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2805 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2806 Register DstReg = MI.getOperand(0).getReg();
2807 Register SrcReg = MI.getOperand(1).getReg();
2808
2809 if (!MRI.hasOneNonDBGUse(SrcReg))
2810 return false;
2811
2812 LLT SrcTy = MRI.getType(SrcReg);
2813 LLT DstTy = MRI.getType(DstReg);
2814
2815 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2816 const auto &TL = getTargetLowering();
2817
2818 LLT NewShiftTy;
2819 switch (SrcMI->getOpcode()) {
2820 default:
2821 return false;
2822 case TargetOpcode::G_SHL: {
2823 NewShiftTy = DstTy;
2824
2825 // Make sure new shift amount is legal.
2826 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2827 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2828 return false;
2829 break;
2830 }
2831 case TargetOpcode::G_LSHR:
2832 case TargetOpcode::G_ASHR: {
2833 // For right shifts, we conservatively do not do the transform if the TRUNC
2834 // has any STORE users. The reason is that if we change the type of the
2835 // shift, we may break the truncstore combine.
2836 //
2837 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2838 for (auto &User : MRI.use_instructions(DstReg))
2839 if (User.getOpcode() == TargetOpcode::G_STORE)
2840 return false;
2841
2842 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2843 if (NewShiftTy == SrcTy)
2844 return false;
2845
2846 // Make sure we won't lose information by truncating the high bits.
2847 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2848 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2849 DstTy.getScalarSizeInBits()))
2850 return false;
2851 break;
2852 }
2853 }
2854
2856 {SrcMI->getOpcode(),
2857 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2858 return false;
2859
2860 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2861 return true;
2862}
2863
2865 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2866 MachineInstr *ShiftMI = MatchInfo.first;
2867 LLT NewShiftTy = MatchInfo.second;
2868
2869 Register Dst = MI.getOperand(0).getReg();
2870 LLT DstTy = MRI.getType(Dst);
2871
2872 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2873 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2874 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2875
2876 Register NewShift =
2877 Builder
2878 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2879 .getReg(0);
2880
2881 if (NewShiftTy == DstTy)
2882 replaceRegWith(MRI, Dst, NewShift);
2883 else
2884 Builder.buildTrunc(Dst, NewShift);
2885
2886 eraseInst(MI);
2887}
2888
2890 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2891 return MO.isReg() &&
2892 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2893 });
2894}
2895
2897 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2898 return !MO.isReg() ||
2899 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2900 });
2901}
2902
2904 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2905 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2906 return all_of(Mask, [](int Elt) { return Elt < 0; });
2907}
2908
2910 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2911 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2912 MRI);
2913}
2914
2916 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2917 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2918 MRI);
2919}
2920
2922 MachineInstr &MI) const {
2923 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2924 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2925 "Expected an insert/extract element op");
2926 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2927 if (VecTy.isScalableVector())
2928 return false;
2929
2930 unsigned IdxIdx =
2931 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2932 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2933 if (!Idx)
2934 return false;
2935 return Idx->getZExtValue() >= VecTy.getNumElements();
2936}
2937
2939 unsigned &OpIdx) const {
2940 GSelect &SelMI = cast<GSelect>(MI);
2941 auto Cst =
2942 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2943 if (!Cst)
2944 return false;
2945 OpIdx = Cst->isZero() ? 3 : 2;
2946 return true;
2947}
2948
2949void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2950
2952 const MachineOperand &MOP2) const {
2953 if (!MOP1.isReg() || !MOP2.isReg())
2954 return false;
2955 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2956 if (!InstAndDef1)
2957 return false;
2958 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2959 if (!InstAndDef2)
2960 return false;
2961 MachineInstr *I1 = InstAndDef1->MI;
2962 MachineInstr *I2 = InstAndDef2->MI;
2963
2964 // Handle a case like this:
2965 //
2966 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2967 //
2968 // Even though %0 and %1 are produced by the same instruction they are not
2969 // the same values.
2970 if (I1 == I2)
2971 return MOP1.getReg() == MOP2.getReg();
2972
2973 // If we have an instruction which loads or stores, we can't guarantee that
2974 // it is identical.
2975 //
2976 // For example, we may have
2977 //
2978 // %x1 = G_LOAD %addr (load N from @somewhere)
2979 // ...
2980 // call @foo
2981 // ...
2982 // %x2 = G_LOAD %addr (load N from @somewhere)
2983 // ...
2984 // %or = G_OR %x1, %x2
2985 //
2986 // It's possible that @foo will modify whatever lives at the address we're
2987 // loading from. To be safe, let's just assume that all loads and stores
2988 // are different (unless we have something which is guaranteed to not
2989 // change.)
2990 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2991 return false;
2992
2993 // If both instructions are loads or stores, they are equal only if both
2994 // are dereferenceable invariant loads with the same number of bits.
2995 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2998 if (!LS1 || !LS2)
2999 return false;
3000
3001 if (!I2->isDereferenceableInvariantLoad() ||
3002 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
3003 return false;
3004 }
3005
3006 // Check for physical registers on the instructions first to avoid cases
3007 // like this:
3008 //
3009 // %a = COPY $physreg
3010 // ...
3011 // SOMETHING implicit-def $physreg
3012 // ...
3013 // %b = COPY $physreg
3014 //
3015 // These copies are not equivalent.
3016 if (any_of(I1->uses(), [](const MachineOperand &MO) {
3017 return MO.isReg() && MO.getReg().isPhysical();
3018 })) {
3019 // Check if we have a case like this:
3020 //
3021 // %a = COPY $physreg
3022 // %b = COPY %a
3023 //
3024 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
3025 // From that, we know that they must have the same value, since they must
3026 // have come from the same COPY.
3027 return I1->isIdenticalTo(*I2);
3028 }
3029
3030 // We don't have any physical registers, so we don't necessarily need the
3031 // same vreg defs.
3032 //
3033 // On the off-chance that there's some target instruction feeding into the
3034 // instruction, let's use produceSameValue instead of isIdenticalTo.
3035 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
3036 // Handle instructions with multiple defs that produce same values. Values
3037 // are same for operands with same index.
3038 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3039 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3040 // I1 and I2 are different instructions but produce same values,
3041 // %1 and %6 are same, %1 and %7 are not the same value.
3042 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
3043 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
3044 }
3045 return false;
3046}
3047
3049 int64_t C) const {
3050 if (!MOP.isReg())
3051 return false;
3052 auto *MI = MRI.getVRegDef(MOP.getReg());
3053 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
3054 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
3055 MaybeCst->getSExtValue() == C;
3056}
3057
3059 double C) const {
3060 if (!MOP.isReg())
3061 return false;
3062 std::optional<FPValueAndVReg> MaybeCst;
3063 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
3064 return false;
3065
3066 return MaybeCst->Value.isExactlyValue(C);
3067}
3068
3070 unsigned OpIdx) const {
3071 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3072 Register OldReg = MI.getOperand(0).getReg();
3073 Register Replacement = MI.getOperand(OpIdx).getReg();
3074 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3075 replaceRegWith(MRI, OldReg, Replacement);
3076 MI.eraseFromParent();
3077}
3078
3080 Register Replacement) const {
3081 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3082 Register OldReg = MI.getOperand(0).getReg();
3083 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3084 replaceRegWith(MRI, OldReg, Replacement);
3085 MI.eraseFromParent();
3086}
3087
3089 unsigned ConstIdx) const {
3090 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3091 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3092
3093 // Get the shift amount
3094 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3095 if (!VRegAndVal)
3096 return false;
3097
3098 // Return true of shift amount >= Bitwidth
3099 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3100}
3101
3103 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3104 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3105 "This is not a funnel shift operation");
3106
3107 Register ConstReg = MI.getOperand(3).getReg();
3108 LLT ConstTy = MRI.getType(ConstReg);
3109 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3110
3111 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3112 assert((VRegAndVal) && "Value is not a constant");
3113
3114 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3115 APInt NewConst = VRegAndVal->Value.urem(
3116 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3117
3118 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3119 Builder.buildInstr(
3120 MI.getOpcode(), {MI.getOperand(0)},
3121 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3122
3123 MI.eraseFromParent();
3124}
3125
3127 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3128 // Match (cond ? x : x)
3129 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3130 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3131 MRI);
3132}
3133
3135 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3136 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3137 MRI);
3138}
3139
3141 unsigned OpIdx) const {
3142 MachineOperand &MO = MI.getOperand(OpIdx);
3143 return MO.isReg() &&
3144 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3145}
3146
3148 const MachineOperand &MO, bool OrNegative) const {
3149 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT, OrNegative);
3150}
3151
3153 double C) const {
3154 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3155 Builder.buildFConstant(MI.getOperand(0), C);
3156 MI.eraseFromParent();
3157}
3158
3160 int64_t C) const {
3161 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3162 Builder.buildConstant(MI.getOperand(0), C);
3163 MI.eraseFromParent();
3164}
3165
3167 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3168 Builder.buildConstant(MI.getOperand(0), C);
3169 MI.eraseFromParent();
3170}
3171
3173 ConstantFP *CFP) const {
3174 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3175 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3176 MI.eraseFromParent();
3177}
3178
3180 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3181 Builder.buildUndef(MI.getOperand(0));
3182 MI.eraseFromParent();
3183}
3184
3186 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3187 Register LHS = MI.getOperand(1).getReg();
3188 Register RHS = MI.getOperand(2).getReg();
3189 Register &NewLHS = std::get<0>(MatchInfo);
3190 Register &NewRHS = std::get<1>(MatchInfo);
3191
3192 // Helper lambda to check for opportunities for
3193 // ((0-A) + B) -> B - A
3194 // (A + (0-B)) -> A - B
3195 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3196 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3197 return false;
3198 NewLHS = MaybeNewLHS;
3199 return true;
3200 };
3201
3202 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3203}
3204
3206 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3207 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3208 "Invalid opcode");
3209 Register DstReg = MI.getOperand(0).getReg();
3210 LLT DstTy = MRI.getType(DstReg);
3211 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3212
3213 if (DstTy.isScalableVector())
3214 return false;
3215
3216 unsigned NumElts = DstTy.getNumElements();
3217 // If this MI is part of a sequence of insert_vec_elts, then
3218 // don't do the combine in the middle of the sequence.
3219 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3220 TargetOpcode::G_INSERT_VECTOR_ELT)
3221 return false;
3222 MachineInstr *CurrInst = &MI;
3223 MachineInstr *TmpInst;
3224 int64_t IntImm;
3225 Register TmpReg;
3226 MatchInfo.resize(NumElts);
3227 while (mi_match(
3228 CurrInst->getOperand(0).getReg(), MRI,
3229 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3230 if (IntImm >= NumElts || IntImm < 0)
3231 return false;
3232 if (!MatchInfo[IntImm])
3233 MatchInfo[IntImm] = TmpReg;
3234 CurrInst = TmpInst;
3235 }
3236 // Variable index.
3237 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3238 return false;
3239 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3240 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3241 if (!MatchInfo[I - 1].isValid())
3242 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3243 }
3244 return true;
3245 }
3246 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3247 // overwritten, bail out.
3248 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3249 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3250}
3251
3253 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3254 Register UndefReg;
3255 auto GetUndef = [&]() {
3256 if (UndefReg)
3257 return UndefReg;
3258 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3259 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3260 return UndefReg;
3261 };
3262 for (Register &Reg : MatchInfo) {
3263 if (!Reg)
3264 Reg = GetUndef();
3265 }
3266 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3267 MI.eraseFromParent();
3268}
3269
3271 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3272 Register SubLHS, SubRHS;
3273 std::tie(SubLHS, SubRHS) = MatchInfo;
3274 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3275 MI.eraseFromParent();
3276}
3277
3278bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3279 unsigned RootOpc, Register Dst,
3280 LLT Ty,
3281 BuildFnTy &MatchInfo) const {
3282 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3283 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3284 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3285 if (!InnerDef)
3286 return false;
3287
3288 unsigned InnerOpc = InnerDef->getOpcode();
3289 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3290 return false;
3291
3292 if (!MRI.hasOneNonDBGUse(MInner))
3293 return false;
3294
3295 Register InnerLHS = InnerDef->getOperand(1).getReg();
3296 Register InnerRHS = InnerDef->getOperand(2).getReg();
3297 Register NotSrc;
3298 Register B, C;
3299
3300 // Check if either operand is ~b
3301 auto TryMatch = [&](Register MaybeNot, Register Other) {
3302 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3303 if (!MRI.hasOneNonDBGUse(MaybeNot))
3304 return false;
3305 B = NotSrc;
3306 C = Other;
3307 return true;
3308 }
3309 return false;
3310 };
3311
3312 // For SUB, the not must be the LHS. For ADD, it can be either operand.
3313 if (!TryMatch(InnerLHS, InnerRHS) &&
3314 !(InnerOpc == TargetOpcode::G_ADD && TryMatch(InnerRHS, InnerLHS)))
3315 return false;
3316
3317 // Flip add/sub
3318 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3319 : TargetOpcode::G_ADD;
3320
3321 Register A = Other;
3322 MatchInfo = [=](MachineIRBuilder &Builder) {
3323 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3324 auto NewNot = Builder.buildNot(Ty, NewInner);
3325 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3326 };
3327 return true;
3328}
3329
3331 BuildFnTy &MatchInfo) const {
3332 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3333 // Root MI is one of G_AND, G_OR, G_XOR.
3334 // We also look for commuted forms of operations. Pattern shouldn't apply
3335 // if there are multiple reasons of inner operations.
3336
3337 unsigned RootOpc = MI.getOpcode();
3338 Register Dst = MI.getOperand(0).getReg();
3339 LLT Ty = MRI.getType(Dst);
3340
3341 Register LHS = MI.getOperand(1).getReg();
3342 Register RHS = MI.getOperand(2).getReg();
3343 // Check the commuted and uncommuted forms of the operation.
3344 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3345 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3346}
3347
3349 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3350 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3351 //
3352 // Creates the new hand + logic instruction (but does not insert them.)
3353 //
3354 // On success, MatchInfo is populated with the new instructions. These are
3355 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3356 unsigned LogicOpcode = MI.getOpcode();
3357 assert(LogicOpcode == TargetOpcode::G_AND ||
3358 LogicOpcode == TargetOpcode::G_OR ||
3359 LogicOpcode == TargetOpcode::G_XOR);
3360 MachineIRBuilder MIB(MI);
3361 Register Dst = MI.getOperand(0).getReg();
3362 Register LHSReg = MI.getOperand(1).getReg();
3363 Register RHSReg = MI.getOperand(2).getReg();
3364
3365 // Don't recompute anything.
3366 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3367 return false;
3368
3369 // Make sure we have (hand x, ...), (hand y, ...)
3370 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3371 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3372 if (!LeftHandInst || !RightHandInst)
3373 return false;
3374 unsigned HandOpcode = LeftHandInst->getOpcode();
3375 if (HandOpcode != RightHandInst->getOpcode())
3376 return false;
3377 if (LeftHandInst->getNumOperands() < 2 ||
3378 !LeftHandInst->getOperand(1).isReg() ||
3379 RightHandInst->getNumOperands() < 2 ||
3380 !RightHandInst->getOperand(1).isReg())
3381 return false;
3382
3383 // Make sure the types match up, and if we're doing this post-legalization,
3384 // we end up with legal types.
3385 Register X = LeftHandInst->getOperand(1).getReg();
3386 Register Y = RightHandInst->getOperand(1).getReg();
3387 LLT XTy = MRI.getType(X);
3388 LLT YTy = MRI.getType(Y);
3389 if (!XTy.isValid() || XTy != YTy)
3390 return false;
3391
3392 // Optional extra source register.
3393 Register ExtraHandOpSrcReg;
3394 switch (HandOpcode) {
3395 default:
3396 return false;
3397 case TargetOpcode::G_ANYEXT:
3398 case TargetOpcode::G_SEXT:
3399 case TargetOpcode::G_ZEXT: {
3400 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3401 break;
3402 }
3403 case TargetOpcode::G_TRUNC: {
3404 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3405 const MachineFunction *MF = MI.getMF();
3406 LLVMContext &Ctx = MF->getFunction().getContext();
3407
3408 LLT DstTy = MRI.getType(Dst);
3409 const TargetLowering &TLI = getTargetLowering();
3410
3411 // Be extra careful sinking truncate. If it's free, there's no benefit in
3412 // widening a binop.
3413 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3414 return false;
3415 break;
3416 }
3417 case TargetOpcode::G_AND:
3418 case TargetOpcode::G_ASHR:
3419 case TargetOpcode::G_LSHR:
3420 case TargetOpcode::G_SHL: {
3421 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3422 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3423 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3424 return false;
3425 ExtraHandOpSrcReg = ZOp.getReg();
3426 break;
3427 }
3428 }
3429
3430 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3431 return false;
3432
3433 // Record the steps to build the new instructions.
3434 //
3435 // Steps to build (logic x, y)
3436 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3437 OperandBuildSteps LogicBuildSteps = {
3438 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3439 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3440 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3441 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3442
3443 // Steps to build hand (logic x, y), ...z
3444 OperandBuildSteps HandBuildSteps = {
3445 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3446 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3447 if (ExtraHandOpSrcReg.isValid())
3448 HandBuildSteps.push_back(
3449 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3450 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3451
3452 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3453 return true;
3454}
3455
3457 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3458 assert(MatchInfo.InstrsToBuild.size() &&
3459 "Expected at least one instr to build?");
3460 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3461 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3462 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3463 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3464 for (auto &OperandFn : InstrToBuild.OperandFns)
3465 OperandFn(Instr);
3466 }
3467 MI.eraseFromParent();
3468}
3469
3471 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3472 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3473 int64_t ShlCst, AshrCst;
3474 Register Src;
3475 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3476 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3477 m_ICstOrSplat(AshrCst))))
3478 return false;
3479 if (ShlCst != AshrCst)
3480 return false;
3482 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3483 return false;
3484 MatchInfo = std::make_tuple(Src, ShlCst);
3485 return true;
3486}
3487
3489 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3490 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3491 Register Src;
3492 int64_t ShiftAmt;
3493 std::tie(Src, ShiftAmt) = MatchInfo;
3494 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3495 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3496 MI.eraseFromParent();
3497}
3498
3499/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3502 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3503 assert(MI.getOpcode() == TargetOpcode::G_AND);
3504
3505 Register Dst = MI.getOperand(0).getReg();
3506 LLT Ty = MRI.getType(Dst);
3507
3508 Register R;
3509 int64_t C1;
3510 int64_t C2;
3511 if (!mi_match(
3512 Dst, MRI,
3513 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3514 return false;
3515
3516 MatchInfo = [=](MachineIRBuilder &B) {
3517 if (C1 & C2) {
3518 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3519 return;
3520 }
3521 auto Zero = B.buildConstant(Ty, 0);
3522 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3523 };
3524 return true;
3525}
3526
3528 Register &Replacement) const {
3529 // Given
3530 //
3531 // %y:_(sN) = G_SOMETHING
3532 // %x:_(sN) = G_SOMETHING
3533 // %res:_(sN) = G_AND %x, %y
3534 //
3535 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3536 //
3537 // Patterns like this can appear as a result of legalization. E.g.
3538 //
3539 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3540 // %one:_(s32) = G_CONSTANT i32 1
3541 // %and:_(s32) = G_AND %cmp, %one
3542 //
3543 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3544 assert(MI.getOpcode() == TargetOpcode::G_AND);
3545 if (!VT)
3546 return false;
3547
3548 Register AndDst = MI.getOperand(0).getReg();
3549 Register LHS = MI.getOperand(1).getReg();
3550 Register RHS = MI.getOperand(2).getReg();
3551
3552 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3553 // we can't do anything. If we do, then it depends on whether we have
3554 // KnownBits on the LHS.
3555 KnownBits RHSBits = VT->getKnownBits(RHS);
3556 if (RHSBits.isUnknown())
3557 return false;
3558
3559 KnownBits LHSBits = VT->getKnownBits(LHS);
3560
3561 // Check that x & Mask == x.
3562 // x & 1 == x, always
3563 // x & 0 == x, only if x is also 0
3564 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3565 //
3566 // Check if we can replace AndDst with the LHS of the G_AND
3567 if (canReplaceReg(AndDst, LHS, MRI) &&
3568 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3569 Replacement = LHS;
3570 return true;
3571 }
3572
3573 // Check if we can replace AndDst with the RHS of the G_AND
3574 if (canReplaceReg(AndDst, RHS, MRI) &&
3575 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3576 Replacement = RHS;
3577 return true;
3578 }
3579
3580 return false;
3581}
3582
3584 Register &Replacement) const {
3585 // Given
3586 //
3587 // %y:_(sN) = G_SOMETHING
3588 // %x:_(sN) = G_SOMETHING
3589 // %res:_(sN) = G_OR %x, %y
3590 //
3591 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3592 assert(MI.getOpcode() == TargetOpcode::G_OR);
3593 if (!VT)
3594 return false;
3595
3596 Register OrDst = MI.getOperand(0).getReg();
3597 Register LHS = MI.getOperand(1).getReg();
3598 Register RHS = MI.getOperand(2).getReg();
3599
3600 KnownBits LHSBits = VT->getKnownBits(LHS);
3601 KnownBits RHSBits = VT->getKnownBits(RHS);
3602
3603 // Check that x | Mask == x.
3604 // x | 0 == x, always
3605 // x | 1 == x, only if x is also 1
3606 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3607 //
3608 // Check if we can replace OrDst with the LHS of the G_OR
3609 if (canReplaceReg(OrDst, LHS, MRI) &&
3610 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3611 Replacement = LHS;
3612 return true;
3613 }
3614
3615 // Check if we can replace OrDst with the RHS of the G_OR
3616 if (canReplaceReg(OrDst, RHS, MRI) &&
3617 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3618 Replacement = RHS;
3619 return true;
3620 }
3621
3622 return false;
3623}
3624
3626 // If the input is already sign extended, just drop the extension.
3627 Register Src = MI.getOperand(1).getReg();
3628 unsigned ExtBits = MI.getOperand(2).getImm();
3629 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3630 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3631}
3632
3633static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3634 int64_t Cst, bool IsVector, bool IsFP) {
3635 // For i1, Cst will always be -1 regardless of boolean contents.
3636 return (ScalarSizeBits == 1 && Cst == -1) ||
3637 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3638}
3639
3640// This pattern aims to match the following shape to avoid extra mov
3641// instructions
3642// G_BUILD_VECTOR(
3643// G_UNMERGE_VALUES(src, 0)
3644// G_UNMERGE_VALUES(src, 1)
3645// G_IMPLICIT_DEF
3646// G_IMPLICIT_DEF
3647// )
3648// ->
3649// G_CONCAT_VECTORS(
3650// src,
3651// undef
3652// )
3655 Register &UnmergeSrc) const {
3656 auto &BV = cast<GBuildVector>(MI);
3657
3658 unsigned BuildUseCount = BV.getNumSources();
3659 if (BuildUseCount % 2 != 0)
3660 return false;
3661
3662 unsigned NumUnmerge = BuildUseCount / 2;
3663
3664 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3665
3666 // Check the first operand is an unmerge and has the correct number of
3667 // operands
3668 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3669 return false;
3670
3671 UnmergeSrc = Unmerge->getSourceReg();
3672
3673 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3674 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3675
3676 if (!UnmergeSrcTy.isVector())
3677 return false;
3678
3679 // Ensure we only generate legal instructions post-legalizer
3680 if (!IsPreLegalize &&
3681 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3682 return false;
3683
3684 // Check that all of the operands before the midpoint come from the same
3685 // unmerge and are in the same order as they are used in the build_vector
3686 for (unsigned I = 0; I < NumUnmerge; ++I) {
3687 auto MaybeUnmergeReg = BV.getSourceReg(I);
3688 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3689
3690 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3691 return false;
3692
3693 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3694 return false;
3695 }
3696
3697 // Check that all of the unmerged values are used
3698 if (Unmerge->getNumDefs() != NumUnmerge)
3699 return false;
3700
3701 // Check that all of the operands after the mid point are undefs.
3702 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3703 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3704
3705 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3706 return false;
3707 }
3708
3709 return true;
3710}
3711
3715 Register &UnmergeSrc) const {
3716 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3717 B.setInstrAndDebugLoc(MI);
3718
3719 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3720 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3721
3722 MI.eraseFromParent();
3723}
3724
3725// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3726// using vector truncates instead
3727//
3728// EXAMPLE:
3729// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3730// %T_a(i16) = G_TRUNC %a(i32)
3731// %T_b(i16) = G_TRUNC %b(i32)
3732// %Undef(i16) = G_IMPLICIT_DEF(i16)
3733// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3734//
3735// ===>
3736// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3737// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3738// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3739//
3740// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3742 Register &MatchInfo) const {
3743 auto BuildMI = cast<GBuildVector>(&MI);
3744 unsigned NumOperands = BuildMI->getNumSources();
3745 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3746
3747 // Check the G_BUILD_VECTOR sources
3748 unsigned I;
3749 MachineInstr *UnmergeMI = nullptr;
3750
3751 // Check all source TRUNCs come from the same UNMERGE instruction
3752 // and that the element order matches (BUILD_VECTOR position I
3753 // corresponds to UNMERGE result I)
3754 for (I = 0; I < NumOperands; ++I) {
3755 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3756 auto SrcMIOpc = SrcMI->getOpcode();
3757
3758 // Check if the G_TRUNC instructions all come from the same MI
3759 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3760 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3761 if (!UnmergeMI) {
3762 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3763 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3764 return false;
3765 } else {
3766 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3767 if (UnmergeMI != UnmergeSrcMI)
3768 return false;
3769 }
3770 // Verify element ordering: BUILD_VECTOR position I must use
3771 // UNMERGE result I, otherwise the fold would lose element reordering
3772 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3773 return false;
3774 } else {
3775 break;
3776 }
3777 }
3778 if (I < 2)
3779 return false;
3780
3781 // Check the remaining source elements are only G_IMPLICIT_DEF
3782 for (; I < NumOperands; ++I) {
3783 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3784 auto SrcMIOpc = SrcMI->getOpcode();
3785
3786 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3787 return false;
3788 }
3789
3790 // Check the size of unmerge source
3791 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3792 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3793 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3794 return false;
3795
3796 // Check the unmerge source and destination element types match
3797 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3798 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3799 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3800 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3801 return false;
3802
3803 // Only generate legal instructions post-legalizer
3804 if (!IsPreLegalize) {
3805 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3806
3807 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3808 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3809 return false;
3810
3811 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3812 return false;
3813 }
3814
3815 return true;
3816}
3817
3819 Register &MatchInfo) const {
3820 Register MidReg;
3821 auto BuildMI = cast<GBuildVector>(&MI);
3822 Register DstReg = BuildMI->getReg(0);
3823 LLT DstTy = MRI.getType(DstReg);
3824 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3825 unsigned DstTyNumElt = DstTy.getNumElements();
3826 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3827
3828 // No need to pad vector if only G_TRUNC is needed
3829 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3830 MidReg = MatchInfo;
3831 } else {
3832 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3833 SmallVector<Register> ConcatRegs = {MatchInfo};
3834 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3835 ConcatRegs.push_back(UndefReg);
3836
3837 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3838 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3839 }
3840
3841 Builder.buildTrunc(DstReg, MidReg);
3842 MI.eraseFromParent();
3843}
3844
3846 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3847 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3848 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3849 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3850 Register XorSrc;
3851 Register CstReg;
3852 // We match xor(src, true) here.
3853 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3854 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3855 return false;
3856
3857 if (!MRI.hasOneNonDBGUse(XorSrc))
3858 return false;
3859
3860 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3861 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3862 // list of tree nodes to visit.
3863 RegsToNegate.push_back(XorSrc);
3864 // Remember whether the comparisons are all integer or all floating point.
3865 bool IsInt = false;
3866 bool IsFP = false;
3867 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3868 Register Reg = RegsToNegate[I];
3869 if (!MRI.hasOneNonDBGUse(Reg))
3870 return false;
3871 MachineInstr *Def = MRI.getVRegDef(Reg);
3872 switch (Def->getOpcode()) {
3873 default:
3874 // Don't match if the tree contains anything other than ANDs, ORs and
3875 // comparisons.
3876 return false;
3877 case TargetOpcode::G_ICMP:
3878 if (IsFP)
3879 return false;
3880 IsInt = true;
3881 // When we apply the combine we will invert the predicate.
3882 break;
3883 case TargetOpcode::G_FCMP:
3884 if (IsInt)
3885 return false;
3886 IsFP = true;
3887 // When we apply the combine we will invert the predicate.
3888 break;
3889 case TargetOpcode::G_AND:
3890 case TargetOpcode::G_OR:
3891 // Implement De Morgan's laws:
3892 // ~(x & y) -> ~x | ~y
3893 // ~(x | y) -> ~x & ~y
3894 // When we apply the combine we will change the opcode and recursively
3895 // negate the operands.
3896 RegsToNegate.push_back(Def->getOperand(1).getReg());
3897 RegsToNegate.push_back(Def->getOperand(2).getReg());
3898 break;
3899 }
3900 }
3901
3902 // Now we know whether the comparisons are integer or floating point, check
3903 // the constant in the xor.
3904 int64_t Cst;
3905 if (Ty.isVector()) {
3906 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3907 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3908 if (!MaybeCst)
3909 return false;
3910 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3911 return false;
3912 } else {
3913 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3914 return false;
3915 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3916 return false;
3917 }
3918
3919 return true;
3920}
3921
3923 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3924 for (Register Reg : RegsToNegate) {
3925 MachineInstr *Def = MRI.getVRegDef(Reg);
3926 Observer.changingInstr(*Def);
3927 // For each comparison, invert the opcode. For each AND and OR, change the
3928 // opcode.
3929 switch (Def->getOpcode()) {
3930 default:
3931 llvm_unreachable("Unexpected opcode");
3932 case TargetOpcode::G_ICMP:
3933 case TargetOpcode::G_FCMP: {
3934 MachineOperand &PredOp = Def->getOperand(1);
3937 PredOp.setPredicate(NewP);
3938 break;
3939 }
3940 case TargetOpcode::G_AND:
3941 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3942 break;
3943 case TargetOpcode::G_OR:
3944 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3945 break;
3946 }
3947 Observer.changedInstr(*Def);
3948 }
3949
3950 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3951 MI.eraseFromParent();
3952}
3953
3955 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3956 // Match (xor (and x, y), y) (or any of its commuted cases)
3957 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3958 Register &X = MatchInfo.first;
3959 Register &Y = MatchInfo.second;
3960 Register AndReg = MI.getOperand(1).getReg();
3961 Register SharedReg = MI.getOperand(2).getReg();
3962
3963 // Find a G_AND on either side of the G_XOR.
3964 // Look for one of
3965 //
3966 // (xor (and x, y), SharedReg)
3967 // (xor SharedReg, (and x, y))
3968 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3969 std::swap(AndReg, SharedReg);
3970 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3971 return false;
3972 }
3973
3974 // Only do this if we'll eliminate the G_AND.
3975 if (!MRI.hasOneNonDBGUse(AndReg))
3976 return false;
3977
3978 // We can combine if SharedReg is the same as either the LHS or RHS of the
3979 // G_AND.
3980 if (Y != SharedReg)
3981 std::swap(X, Y);
3982 return Y == SharedReg;
3983}
3984
3986 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3987 // Fold (xor (and x, y), y) -> (and (not x), y)
3988 Register X, Y;
3989 std::tie(X, Y) = MatchInfo;
3990 auto Not = Builder.buildNot(MRI.getType(X), X);
3991 Observer.changingInstr(MI);
3992 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3993 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3994 MI.getOperand(2).setReg(Y);
3995 Observer.changedInstr(MI);
3996}
3997
3999 auto &PtrAdd = cast<GPtrAdd>(MI);
4000 Register DstReg = PtrAdd.getReg(0);
4001 LLT Ty = MRI.getType(DstReg);
4002 const DataLayout &DL = Builder.getMF().getDataLayout();
4003
4004 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
4005 return false;
4006
4007 if (Ty.isPointer()) {
4008 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
4009 return ConstVal && *ConstVal == 0;
4010 }
4011
4012 assert(Ty.isVector() && "Expecting a vector type");
4013 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
4014 return isBuildVectorAllZeros(*VecMI, MRI);
4015}
4016
4018 auto &PtrAdd = cast<GPtrAdd>(MI);
4019 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
4020 PtrAdd.eraseFromParent();
4021}
4022
4023/// The second source operand is known to be a power of 2.
4025 Register DstReg = MI.getOperand(0).getReg();
4026 Register Src0 = MI.getOperand(1).getReg();
4027 Register Pow2Src1 = MI.getOperand(2).getReg();
4028 LLT Ty = MRI.getType(DstReg);
4029
4030 // Fold (urem x, pow2) -> (and x, pow2-1)
4031 auto NegOne = Builder.buildConstant(Ty, -1);
4032 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
4033 Builder.buildAnd(DstReg, Src0, Add);
4034 MI.eraseFromParent();
4035}
4036
4038 unsigned &SelectOpNo) const {
4039 Register LHS = MI.getOperand(1).getReg();
4040 Register RHS = MI.getOperand(2).getReg();
4041
4042 Register OtherOperandReg = RHS;
4043 SelectOpNo = 1;
4044 MachineInstr *Select = MRI.getVRegDef(LHS);
4045
4046 // Don't do this unless the old select is going away. We want to eliminate the
4047 // binary operator, not replace a binop with a select.
4048 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4049 !MRI.hasOneNonDBGUse(LHS)) {
4050 OtherOperandReg = LHS;
4051 SelectOpNo = 2;
4052 Select = MRI.getVRegDef(RHS);
4053 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4054 !MRI.hasOneNonDBGUse(RHS))
4055 return false;
4056 }
4057
4058 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
4059 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
4060
4061 if (!isConstantOrConstantVector(*SelectLHS, MRI,
4062 /*AllowFP*/ true,
4063 /*AllowOpaqueConstants*/ false))
4064 return false;
4065 if (!isConstantOrConstantVector(*SelectRHS, MRI,
4066 /*AllowFP*/ true,
4067 /*AllowOpaqueConstants*/ false))
4068 return false;
4069
4070 unsigned BinOpcode = MI.getOpcode();
4071
4072 // We know that one of the operands is a select of constants. Now verify that
4073 // the other binary operator operand is either a constant, or we can handle a
4074 // variable.
4075 bool CanFoldNonConst =
4076 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
4077 (isNullOrNullSplat(*SelectLHS, MRI) ||
4078 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
4079 (isNullOrNullSplat(*SelectRHS, MRI) ||
4080 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
4081 if (CanFoldNonConst)
4082 return true;
4083
4084 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4085 /*AllowFP*/ true,
4086 /*AllowOpaqueConstants*/ false);
4087}
4088
4089/// \p SelectOperand is the operand in binary operator \p MI that is the select
4090/// to fold.
4092 MachineInstr &MI, const unsigned &SelectOperand) const {
4093 Register Dst = MI.getOperand(0).getReg();
4094 Register LHS = MI.getOperand(1).getReg();
4095 Register RHS = MI.getOperand(2).getReg();
4096 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4097
4098 Register SelectCond = Select->getOperand(1).getReg();
4099 Register SelectTrue = Select->getOperand(2).getReg();
4100 Register SelectFalse = Select->getOperand(3).getReg();
4101
4102 LLT Ty = MRI.getType(Dst);
4103 unsigned BinOpcode = MI.getOpcode();
4104
4105 Register FoldTrue, FoldFalse;
4106
4107 // We have a select-of-constants followed by a binary operator with a
4108 // constant. Eliminate the binop by pulling the constant math into the select.
4109 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4110 if (SelectOperand == 1) {
4111 // TODO: SelectionDAG verifies this actually constant folds before
4112 // committing to the combine.
4113
4114 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4115 FoldFalse =
4116 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4117 } else {
4118 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4119 FoldFalse =
4120 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4121 }
4122
4123 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4124 MI.eraseFromParent();
4125}
4126
4127std::optional<SmallVector<Register, 8>>
4128CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4129 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4130 // We want to detect if Root is part of a tree which represents a bunch
4131 // of loads being merged into a larger load. We'll try to recognize patterns
4132 // like, for example:
4133 //
4134 // Reg Reg
4135 // \ /
4136 // OR_1 Reg
4137 // \ /
4138 // OR_2
4139 // \ Reg
4140 // .. /
4141 // Root
4142 //
4143 // Reg Reg Reg Reg
4144 // \ / \ /
4145 // OR_1 OR_2
4146 // \ /
4147 // \ /
4148 // ...
4149 // Root
4150 //
4151 // Each "Reg" may have been produced by a load + some arithmetic. This
4152 // function will save each of them.
4153 SmallVector<Register, 8> RegsToVisit;
4155
4156 // In the "worst" case, we're dealing with a load for each byte. So, there
4157 // are at most #bytes - 1 ORs.
4158 const unsigned MaxIter =
4159 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4160 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4161 if (Ors.empty())
4162 break;
4163 const MachineInstr *Curr = Ors.pop_back_val();
4164 Register OrLHS = Curr->getOperand(1).getReg();
4165 Register OrRHS = Curr->getOperand(2).getReg();
4166
4167 // In the combine, we want to elimate the entire tree.
4168 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4169 return std::nullopt;
4170
4171 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4172 // something that may be a load + arithmetic.
4173 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4174 Ors.push_back(Or);
4175 else
4176 RegsToVisit.push_back(OrLHS);
4177 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4178 Ors.push_back(Or);
4179 else
4180 RegsToVisit.push_back(OrRHS);
4181 }
4182
4183 // We're going to try and merge each register into a wider power-of-2 type,
4184 // so we ought to have an even number of registers.
4185 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4186 return std::nullopt;
4187 return RegsToVisit;
4188}
4189
4190/// Helper function for findLoadOffsetsForLoadOrCombine.
4191///
4192/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4193/// and then moving that value into a specific byte offset.
4194///
4195/// e.g. x[i] << 24
4196///
4197/// \returns The load instruction and the byte offset it is moved into.
4198static std::optional<std::pair<GZExtLoad *, int64_t>>
4199matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4200 const MachineRegisterInfo &MRI) {
4201 assert(MRI.hasOneNonDBGUse(Reg) &&
4202 "Expected Reg to only have one non-debug use?");
4203 Register MaybeLoad;
4204 int64_t Shift;
4205 if (!mi_match(Reg, MRI,
4206 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4207 Shift = 0;
4208 MaybeLoad = Reg;
4209 }
4210
4211 if (Shift % MemSizeInBits != 0)
4212 return std::nullopt;
4213
4214 // TODO: Handle other types of loads.
4215 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4216 if (!Load)
4217 return std::nullopt;
4218
4219 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4220 return std::nullopt;
4221
4222 return std::make_pair(Load, Shift / MemSizeInBits);
4223}
4224
4225std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4226CombinerHelper::findLoadOffsetsForLoadOrCombine(
4228 const SmallVector<Register, 8> &RegsToVisit,
4229 const unsigned MemSizeInBits) const {
4230
4231 // Each load found for the pattern. There should be one for each RegsToVisit.
4232 SmallSetVector<const MachineInstr *, 8> Loads;
4233
4234 // The lowest index used in any load. (The lowest "i" for each x[i].)
4235 int64_t LowestIdx = INT64_MAX;
4236
4237 // The load which uses the lowest index.
4238 GZExtLoad *LowestIdxLoad = nullptr;
4239
4240 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4241 SmallSet<int64_t, 8> SeenIdx;
4242
4243 // Ensure each load is in the same MBB.
4244 // TODO: Support multiple MachineBasicBlocks.
4245 MachineBasicBlock *MBB = nullptr;
4246 const MachineMemOperand *MMO = nullptr;
4247
4248 // Earliest instruction-order load in the pattern.
4249 GZExtLoad *EarliestLoad = nullptr;
4250
4251 // Latest instruction-order load in the pattern.
4252 GZExtLoad *LatestLoad = nullptr;
4253
4254 // Base pointer which every load should share.
4256
4257 // We want to find a load for each register. Each load should have some
4258 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4259 // track of the load which uses the lowest index. Later, we will check if we
4260 // can use its pointer in the final, combined load.
4261 for (auto Reg : RegsToVisit) {
4262 // Find the load, and find the position that it will end up in (e.g. a
4263 // shifted) value.
4264 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4265 if (!LoadAndPos)
4266 return std::nullopt;
4267 GZExtLoad *Load;
4268 int64_t DstPos;
4269 std::tie(Load, DstPos) = *LoadAndPos;
4270
4271 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4272 // it is difficult to check for stores/calls/etc between loads.
4273 MachineBasicBlock *LoadMBB = Load->getParent();
4274 if (!MBB)
4275 MBB = LoadMBB;
4276 if (LoadMBB != MBB)
4277 return std::nullopt;
4278
4279 // Make sure that the MachineMemOperands of every seen load are compatible.
4280 auto &LoadMMO = Load->getMMO();
4281 if (!MMO)
4282 MMO = &LoadMMO;
4283 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4284 return std::nullopt;
4285
4286 // Find out what the base pointer and index for the load is.
4287 Register LoadPtr;
4288 int64_t Idx;
4289 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4290 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4291 LoadPtr = Load->getOperand(1).getReg();
4292 Idx = 0;
4293 }
4294
4295 // Don't combine things like a[i], a[i] -> a bigger load.
4296 if (!SeenIdx.insert(Idx).second)
4297 return std::nullopt;
4298
4299 // Every load must share the same base pointer; don't combine things like:
4300 //
4301 // a[i], b[i + 1] -> a bigger load.
4302 if (!BasePtr.isValid())
4303 BasePtr = LoadPtr;
4304 if (BasePtr != LoadPtr)
4305 return std::nullopt;
4306
4307 if (Idx < LowestIdx) {
4308 LowestIdx = Idx;
4309 LowestIdxLoad = Load;
4310 }
4311
4312 // Keep track of the byte offset that this load ends up at. If we have seen
4313 // the byte offset, then stop here. We do not want to combine:
4314 //
4315 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4316 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4317 return std::nullopt;
4318 Loads.insert(Load);
4319
4320 // Keep track of the position of the earliest/latest loads in the pattern.
4321 // We will check that there are no load fold barriers between them later
4322 // on.
4323 //
4324 // FIXME: Is there a better way to check for load fold barriers?
4325 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4326 EarliestLoad = Load;
4327 if (!LatestLoad || dominates(*LatestLoad, *Load))
4328 LatestLoad = Load;
4329 }
4330
4331 // We found a load for each register. Let's check if each load satisfies the
4332 // pattern.
4333 assert(Loads.size() == RegsToVisit.size() &&
4334 "Expected to find a load for each register?");
4335 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4336 LatestLoad && "Expected at least two loads?");
4337
4338 // Check if there are any stores, calls, etc. between any of the loads. If
4339 // there are, then we can't safely perform the combine.
4340 //
4341 // MaxIter is chosen based off the (worst case) number of iterations it
4342 // typically takes to succeed in the LLVM test suite plus some padding.
4343 //
4344 // FIXME: Is there a better way to check for load fold barriers?
4345 const unsigned MaxIter = 20;
4346 unsigned Iter = 0;
4347 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4348 LatestLoad->getIterator())) {
4349 if (Loads.count(&MI))
4350 continue;
4351 if (MI.isLoadFoldBarrier())
4352 return std::nullopt;
4353 if (Iter++ == MaxIter)
4354 return std::nullopt;
4355 }
4356
4357 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4358}
4359
4362 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4363 assert(MI.getOpcode() == TargetOpcode::G_OR);
4364 MachineFunction &MF = *MI.getMF();
4365 // Assuming a little-endian target, transform:
4366 // s8 *a = ...
4367 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4368 // =>
4369 // s32 val = *((i32)a)
4370 //
4371 // s8 *a = ...
4372 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4373 // =>
4374 // s32 val = BSWAP(*((s32)a))
4375 Register Dst = MI.getOperand(0).getReg();
4376 LLT Ty = MRI.getType(Dst);
4377 if (Ty.isVector())
4378 return false;
4379
4380 // We need to combine at least two loads into this type. Since the smallest
4381 // possible load is into a byte, we need at least a 16-bit wide type.
4382 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4383 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4384 return false;
4385
4386 // Match a collection of non-OR instructions in the pattern.
4387 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4388 if (!RegsToVisit)
4389 return false;
4390
4391 // We have a collection of non-OR instructions. Figure out how wide each of
4392 // the small loads should be based off of the number of potential loads we
4393 // found.
4394 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4395 if (NarrowMemSizeInBits % 8 != 0)
4396 return false;
4397
4398 // Check if each register feeding into each OR is a load from the same
4399 // base pointer + some arithmetic.
4400 //
4401 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4402 //
4403 // Also verify that each of these ends up putting a[i] into the same memory
4404 // offset as a load into a wide type would.
4406 GZExtLoad *LowestIdxLoad, *LatestLoad;
4407 int64_t LowestIdx;
4408 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4409 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4410 if (!MaybeLoadInfo)
4411 return false;
4412 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4413
4414 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4415 // we found before, check if this corresponds to a big or little endian byte
4416 // pattern. If it does, then we can represent it using a load + possibly a
4417 // BSWAP.
4418 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4419 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4420 if (!IsBigEndian)
4421 return false;
4422 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4423 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4424 return false;
4425
4426 // Make sure that the load from the lowest index produces offset 0 in the
4427 // final value.
4428 //
4429 // This ensures that we won't combine something like this:
4430 //
4431 // load x[i] -> byte 2
4432 // load x[i+1] -> byte 0 ---> wide_load x[i]
4433 // load x[i+2] -> byte 1
4434 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4435 const unsigned ZeroByteOffset =
4436 *IsBigEndian
4437 ? bigEndianByteAt(NumLoadsInTy, 0)
4438 : littleEndianByteAt(NumLoadsInTy, 0);
4439 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4440 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4441 ZeroOffsetIdx->second != LowestIdx)
4442 return false;
4443
4444 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4445 // may not use index 0.
4446 Register Ptr = LowestIdxLoad->getPointerReg();
4447 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4448 LegalityQuery::MemDesc MMDesc(MMO);
4449 MMDesc.MemoryTy = Ty;
4451 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4452 return false;
4453 auto PtrInfo = MMO.getPointerInfo();
4454 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4455
4456 // Load must be allowed and fast on the target.
4458 auto &DL = MF.getDataLayout();
4459 unsigned Fast = 0;
4460 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4461 !Fast)
4462 return false;
4463
4464 MatchInfo = [=](MachineIRBuilder &MIB) {
4465 MIB.setInstrAndDebugLoc(*LatestLoad);
4466 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4467 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4468 if (NeedsBSwap)
4469 MIB.buildBSwap(Dst, LoadDst);
4470 };
4471 return true;
4472}
4473
4475 MachineInstr *&ExtMI) const {
4476 auto &PHI = cast<GPhi>(MI);
4477 Register DstReg = PHI.getReg(0);
4478
4479 // TODO: Extending a vector may be expensive, don't do this until heuristics
4480 // are better.
4481 if (MRI.getType(DstReg).isVector())
4482 return false;
4483
4484 // Try to match a phi, whose only use is an extend.
4485 if (!MRI.hasOneNonDBGUse(DstReg))
4486 return false;
4487 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4488 switch (ExtMI->getOpcode()) {
4489 case TargetOpcode::G_ANYEXT:
4490 return true; // G_ANYEXT is usually free.
4491 case TargetOpcode::G_ZEXT:
4492 case TargetOpcode::G_SEXT:
4493 break;
4494 default:
4495 return false;
4496 }
4497
4498 // If the target is likely to fold this extend away, don't propagate.
4499 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4500 return false;
4501
4502 // We don't want to propagate the extends unless there's a good chance that
4503 // they'll be optimized in some way.
4504 // Collect the unique incoming values.
4506 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4507 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4508 switch (DefMI->getOpcode()) {
4509 case TargetOpcode::G_LOAD:
4510 case TargetOpcode::G_TRUNC:
4511 case TargetOpcode::G_SEXT:
4512 case TargetOpcode::G_ZEXT:
4513 case TargetOpcode::G_ANYEXT:
4514 case TargetOpcode::G_CONSTANT:
4515 InSrcs.insert(DefMI);
4516 // Don't try to propagate if there are too many places to create new
4517 // extends, chances are it'll increase code size.
4518 if (InSrcs.size() > 2)
4519 return false;
4520 break;
4521 default:
4522 return false;
4523 }
4524 }
4525 return true;
4526}
4527
4529 MachineInstr *&ExtMI) const {
4530 auto &PHI = cast<GPhi>(MI);
4531 Register DstReg = ExtMI->getOperand(0).getReg();
4532 LLT ExtTy = MRI.getType(DstReg);
4533
4534 // Propagate the extension into the block of each incoming reg's block.
4535 // Use a SetVector here because PHIs can have duplicate edges, and we want
4536 // deterministic iteration order.
4539 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4540 auto SrcReg = PHI.getIncomingValue(I);
4541 auto *SrcMI = MRI.getVRegDef(SrcReg);
4542 if (!SrcMIs.insert(SrcMI))
4543 continue;
4544
4545 // Build an extend after each src inst.
4546 auto *MBB = SrcMI->getParent();
4547 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4548 if (InsertPt != MBB->end() && InsertPt->isPHI())
4549 InsertPt = MBB->getFirstNonPHI();
4550
4551 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4552 Builder.setDebugLoc(MI.getDebugLoc());
4553 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4554 OldToNewSrcMap[SrcMI] = NewExt;
4555 }
4556
4557 // Create a new phi with the extended inputs.
4558 Builder.setInstrAndDebugLoc(MI);
4559 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4560 NewPhi.addDef(DstReg);
4561 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4562 if (!MO.isReg()) {
4563 NewPhi.addMBB(MO.getMBB());
4564 continue;
4565 }
4566 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4567 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4568 }
4569 Builder.insertInstr(NewPhi);
4570 ExtMI->eraseFromParent();
4571}
4572
4574 Register &Reg) const {
4575 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4576 // If we have a constant index, look for a G_BUILD_VECTOR source
4577 // and find the source register that the index maps to.
4578 Register SrcVec = MI.getOperand(1).getReg();
4579 LLT SrcTy = MRI.getType(SrcVec);
4580 if (SrcTy.isScalableVector())
4581 return false;
4582
4583 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4584 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4585 return false;
4586
4587 unsigned VecIdx = Cst->Value.getZExtValue();
4588
4589 // Check if we have a build_vector or build_vector_trunc with an optional
4590 // trunc in front.
4591 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4592 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4593 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4594 }
4595
4596 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4597 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4598 return false;
4599
4600 EVT Ty(getMVTForLLT(SrcTy));
4601 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4602 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4603 return false;
4604
4605 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4606 return true;
4607}
4608
4610 Register &Reg) const {
4611 // Check the type of the register, since it may have come from a
4612 // G_BUILD_VECTOR_TRUNC.
4613 LLT ScalarTy = MRI.getType(Reg);
4614 Register DstReg = MI.getOperand(0).getReg();
4615 LLT DstTy = MRI.getType(DstReg);
4616
4617 if (ScalarTy != DstTy) {
4618 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4619 Builder.buildTrunc(DstReg, Reg);
4620 MI.eraseFromParent();
4621 return;
4622 }
4624}
4625
4628 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4629 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4630 // This combine tries to find build_vector's which have every source element
4631 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4632 // the masked load scalarization is run late in the pipeline. There's already
4633 // a combine for a similar pattern starting from the extract, but that
4634 // doesn't attempt to do it if there are multiple uses of the build_vector,
4635 // which in this case is true. Starting the combine from the build_vector
4636 // feels more natural than trying to find sibling nodes of extracts.
4637 // E.g.
4638 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4639 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4640 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4641 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4642 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4643 // ==>
4644 // replace ext{1,2,3,4} with %s{1,2,3,4}
4645
4646 Register DstReg = MI.getOperand(0).getReg();
4647 LLT DstTy = MRI.getType(DstReg);
4648 unsigned NumElts = DstTy.getNumElements();
4649
4650 SmallBitVector ExtractedElts(NumElts);
4651 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4652 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4653 return false;
4654 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4655 if (!Cst)
4656 return false;
4657 unsigned Idx = Cst->getZExtValue();
4658 if (Idx >= NumElts)
4659 return false; // Out of range.
4660 ExtractedElts.set(Idx);
4661 SrcDstPairs.emplace_back(
4662 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4663 }
4664 // Match if every element was extracted.
4665 return ExtractedElts.all();
4666}
4667
4670 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4671 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4672 for (auto &Pair : SrcDstPairs) {
4673 auto *ExtMI = Pair.second;
4674 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4675 ExtMI->eraseFromParent();
4676 }
4677 MI.eraseFromParent();
4678}
4679
4682 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4683 applyBuildFnNoErase(MI, MatchInfo);
4684 MI.eraseFromParent();
4685}
4686
4689 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4690 MatchInfo(Builder);
4691}
4692
4694 bool AllowScalarConstants,
4695 BuildFnTy &MatchInfo) const {
4696 assert(MI.getOpcode() == TargetOpcode::G_OR);
4697
4698 Register Dst = MI.getOperand(0).getReg();
4699 LLT Ty = MRI.getType(Dst);
4700 unsigned BitWidth = Ty.getScalarSizeInBits();
4701
4702 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4703 unsigned FshOpc = 0;
4704
4705 // Match (or (shl ...), (lshr ...)).
4706 if (!mi_match(Dst, MRI,
4707 // m_GOr() handles the commuted version as well.
4708 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4709 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4710 return false;
4711
4712 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4713 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4714 int64_t CstShlAmt = 0, CstLShrAmt;
4715 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4716 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4717 CstShlAmt + CstLShrAmt == BitWidth) {
4718 FshOpc = TargetOpcode::G_FSHR;
4719 Amt = LShrAmt;
4720 } else if (mi_match(LShrAmt, MRI,
4722 ShlAmt == Amt) {
4723 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4724 FshOpc = TargetOpcode::G_FSHL;
4725 } else if (mi_match(ShlAmt, MRI,
4727 LShrAmt == Amt) {
4728 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4729 FshOpc = TargetOpcode::G_FSHR;
4730 } else {
4731 return false;
4732 }
4733
4734 LLT AmtTy = MRI.getType(Amt);
4735 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4736 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4737 return false;
4738
4739 MatchInfo = [=](MachineIRBuilder &B) {
4740 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4741 };
4742 return true;
4743}
4744
4745/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4747 unsigned Opc = MI.getOpcode();
4748 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4749 Register X = MI.getOperand(1).getReg();
4750 Register Y = MI.getOperand(2).getReg();
4751 if (X != Y)
4752 return false;
4753 unsigned RotateOpc =
4754 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4755 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4756}
4757
4759 unsigned Opc = MI.getOpcode();
4760 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4761 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4762 Observer.changingInstr(MI);
4763 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4764 : TargetOpcode::G_ROTR));
4765 MI.removeOperand(2);
4766 Observer.changedInstr(MI);
4767}
4768
4769// Fold (rot x, c) -> (rot x, c % BitSize)
4771 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4772 MI.getOpcode() == TargetOpcode::G_ROTR);
4773 unsigned Bitsize =
4774 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4775 Register AmtReg = MI.getOperand(2).getReg();
4776 bool OutOfRange = false;
4777 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4778 if (auto *CI = dyn_cast<ConstantInt>(C))
4779 OutOfRange |= CI->getValue().uge(Bitsize);
4780 return true;
4781 };
4782 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4783}
4784
4786 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4787 MI.getOpcode() == TargetOpcode::G_ROTR);
4788 unsigned Bitsize =
4789 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4790 Register Amt = MI.getOperand(2).getReg();
4791 LLT AmtTy = MRI.getType(Amt);
4792 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4793 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4794 Observer.changingInstr(MI);
4795 MI.getOperand(2).setReg(Amt);
4796 Observer.changedInstr(MI);
4797}
4798
4800 int64_t &MatchInfo) const {
4801 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4802 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4803
4804 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4805 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4806 // KnownBits on the LHS in two cases:
4807 //
4808 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4809 // we cannot do any transforms so we can safely bail out early.
4810 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4811 // >=0.
4812 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4813 if (KnownRHS.isUnknown())
4814 return false;
4815
4816 std::optional<bool> KnownVal;
4817 if (KnownRHS.isZero()) {
4818 // ? uge 0 -> always true
4819 // ? ult 0 -> always false
4820 if (Pred == CmpInst::ICMP_UGE)
4821 KnownVal = true;
4822 else if (Pred == CmpInst::ICMP_ULT)
4823 KnownVal = false;
4824 }
4825
4826 if (!KnownVal) {
4827 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4828 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4829 }
4830
4831 if (!KnownVal)
4832 return false;
4833 MatchInfo =
4834 *KnownVal
4836 /*IsVector = */
4837 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4838 /* IsFP = */ false)
4839 : 0;
4840 return true;
4841}
4842
4845 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4846 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4847 // Given:
4848 //
4849 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4850 // %cmp = G_ICMP ne %x, 0
4851 //
4852 // Or:
4853 //
4854 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4855 // %cmp = G_ICMP eq %x, 1
4856 //
4857 // We can replace %cmp with %x assuming true is 1 on the target.
4858 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4859 if (!CmpInst::isEquality(Pred))
4860 return false;
4861 Register Dst = MI.getOperand(0).getReg();
4862 LLT DstTy = MRI.getType(Dst);
4864 /* IsFP = */ false) != 1)
4865 return false;
4866 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4867 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4868 return false;
4869 Register LHS = MI.getOperand(2).getReg();
4870 auto KnownLHS = VT->getKnownBits(LHS);
4871 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4872 return false;
4873 // Make sure replacing Dst with the LHS is a legal operation.
4874 LLT LHSTy = MRI.getType(LHS);
4875 unsigned LHSSize = LHSTy.getSizeInBits();
4876 unsigned DstSize = DstTy.getSizeInBits();
4877 unsigned Op = TargetOpcode::COPY;
4878 if (DstSize != LHSSize)
4879 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4880 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4881 return false;
4882 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4883 return true;
4884}
4885
4886// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4889 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4890 assert(MI.getOpcode() == TargetOpcode::G_AND);
4891
4892 // Ignore vector types to simplify matching the two constants.
4893 // TODO: do this for vectors and scalars via a demanded bits analysis.
4894 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4895 if (Ty.isVector())
4896 return false;
4897
4898 Register Src;
4899 Register AndMaskReg;
4900 int64_t AndMaskBits;
4901 int64_t OrMaskBits;
4902 if (!mi_match(MI, MRI,
4903 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4904 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4905 return false;
4906
4907 // Check if OrMask could turn on any bits in Src.
4908 if (AndMaskBits & OrMaskBits)
4909 return false;
4910
4911 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4912 Observer.changingInstr(MI);
4913 // Canonicalize the result to have the constant on the RHS.
4914 if (MI.getOperand(1).getReg() == AndMaskReg)
4915 MI.getOperand(2).setReg(AndMaskReg);
4916 MI.getOperand(1).setReg(Src);
4917 Observer.changedInstr(MI);
4918 };
4919 return true;
4920}
4921
4922/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4925 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4926 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4927 Register Dst = MI.getOperand(0).getReg();
4928 Register Src = MI.getOperand(1).getReg();
4929 LLT Ty = MRI.getType(Src);
4931 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4932 return false;
4933 int64_t Width = MI.getOperand(2).getImm();
4934 Register ShiftSrc;
4935 int64_t ShiftImm;
4936 if (!mi_match(
4937 Src, MRI,
4938 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4939 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4940 return false;
4941 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4942 return false;
4943
4944 MatchInfo = [=](MachineIRBuilder &B) {
4945 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4946 auto Cst2 = B.buildConstant(ExtractTy, Width);
4947 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4948 };
4949 return true;
4950}
4951
4952/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4954 BuildFnTy &MatchInfo) const {
4955 GAnd *And = cast<GAnd>(&MI);
4956 Register Dst = And->getReg(0);
4957 LLT Ty = MRI.getType(Dst);
4959 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4960 // into account.
4961 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4962 return false;
4963
4964 int64_t AndImm, LSBImm;
4965 Register ShiftSrc;
4966 const unsigned Size = Ty.getScalarSizeInBits();
4967 if (!mi_match(And->getReg(0), MRI,
4968 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4969 m_ICst(AndImm))))
4970 return false;
4971
4972 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4973 auto MaybeMask = static_cast<uint64_t>(AndImm);
4974 if (MaybeMask & (MaybeMask + 1))
4975 return false;
4976
4977 // LSB must fit within the register.
4978 if (static_cast<uint64_t>(LSBImm) >= Size)
4979 return false;
4980
4981 uint64_t Width = APInt(Size, AndImm).countr_one();
4982 MatchInfo = [=](MachineIRBuilder &B) {
4983 auto WidthCst = B.buildConstant(ExtractTy, Width);
4984 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4985 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4986 };
4987 return true;
4988}
4989
4992 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4993 const unsigned Opcode = MI.getOpcode();
4994 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4995
4996 const Register Dst = MI.getOperand(0).getReg();
4997
4998 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4999 ? TargetOpcode::G_SBFX
5000 : TargetOpcode::G_UBFX;
5001
5002 // Check if the type we would use for the extract is legal
5003 LLT Ty = MRI.getType(Dst);
5005 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
5006 return false;
5007
5008 Register ShlSrc;
5009 int64_t ShrAmt;
5010 int64_t ShlAmt;
5011 const unsigned Size = Ty.getScalarSizeInBits();
5012
5013 // Try to match shr (shl x, c1), c2
5014 if (!mi_match(Dst, MRI,
5015 m_BinOp(Opcode,
5016 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
5017 m_ICst(ShrAmt))))
5018 return false;
5019
5020 // Make sure that the shift sizes can fit a bitfield extract
5021 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
5022 return false;
5023
5024 // Skip this combine if the G_SEXT_INREG combine could handle it
5025 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
5026 return false;
5027
5028 // Calculate start position and width of the extract
5029 const int64_t Pos = ShrAmt - ShlAmt;
5030 const int64_t Width = Size - ShrAmt;
5031
5032 MatchInfo = [=](MachineIRBuilder &B) {
5033 auto WidthCst = B.buildConstant(ExtractTy, Width);
5034 auto PosCst = B.buildConstant(ExtractTy, Pos);
5035 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
5036 };
5037 return true;
5038}
5039
5042 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5043 const unsigned Opcode = MI.getOpcode();
5044 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
5045
5046 const Register Dst = MI.getOperand(0).getReg();
5047 LLT Ty = MRI.getType(Dst);
5049 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
5050 return false;
5051
5052 // Try to match shr (and x, c1), c2
5053 Register AndSrc;
5054 int64_t ShrAmt;
5055 int64_t SMask;
5056 if (!mi_match(Dst, MRI,
5057 m_BinOp(Opcode,
5058 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
5059 m_ICst(ShrAmt))))
5060 return false;
5061
5062 const unsigned Size = Ty.getScalarSizeInBits();
5063 if (ShrAmt < 0 || ShrAmt >= Size)
5064 return false;
5065
5066 // If the shift subsumes the mask, emit the 0 directly.
5067 if (0 == (SMask >> ShrAmt)) {
5068 MatchInfo = [=](MachineIRBuilder &B) {
5069 B.buildConstant(Dst, 0);
5070 };
5071 return true;
5072 }
5073
5074 // Check that ubfx can do the extraction, with no holes in the mask.
5075 uint64_t UMask = SMask;
5076 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
5078 if (!isMask_64(UMask))
5079 return false;
5080
5081 // Calculate start position and width of the extract.
5082 const int64_t Pos = ShrAmt;
5083 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
5084
5085 // It's preferable to keep the shift, rather than form G_SBFX.
5086 // TODO: remove the G_AND via demanded bits analysis.
5087 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5088 return false;
5089
5090 MatchInfo = [=](MachineIRBuilder &B) {
5091 auto WidthCst = B.buildConstant(ExtractTy, Width);
5092 auto PosCst = B.buildConstant(ExtractTy, Pos);
5093 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5094 };
5095 return true;
5096}
5097
5098bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5099 MachineInstr &MI) const {
5100 auto &PtrAdd = cast<GPtrAdd>(MI);
5101
5102 Register Src1Reg = PtrAdd.getBaseReg();
5103 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5104 if (!Src1Def)
5105 return false;
5106
5107 Register Src2Reg = PtrAdd.getOffsetReg();
5108
5109 if (MRI.hasOneNonDBGUse(Src1Reg))
5110 return false;
5111
5112 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5113 if (!C1)
5114 return false;
5115 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5116 if (!C2)
5117 return false;
5118
5119 const APInt &C1APIntVal = *C1;
5120 const APInt &C2APIntVal = *C2;
5121 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5122
5123 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5124 // This combine may end up running before ptrtoint/inttoptr combines
5125 // manage to eliminate redundant conversions, so try to look through them.
5126 MachineInstr *ConvUseMI = &UseMI;
5127 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5128 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5129 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5130 Register DefReg = ConvUseMI->getOperand(0).getReg();
5131 if (!MRI.hasOneNonDBGUse(DefReg))
5132 break;
5133 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5134 ConvUseOpc = ConvUseMI->getOpcode();
5135 }
5136 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5137 if (!LdStMI)
5138 continue;
5139 // Is x[offset2] already not a legal addressing mode? If so then
5140 // reassociating the constants breaks nothing (we test offset2 because
5141 // that's the one we hope to fold into the load or store).
5142 TargetLoweringBase::AddrMode AM;
5143 AM.HasBaseReg = true;
5144 AM.BaseOffs = C2APIntVal.getSExtValue();
5145 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5146 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5147 PtrAdd.getMF()->getFunction().getContext());
5148 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5149 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5150 AccessTy, AS))
5151 continue;
5152
5153 // Would x[offset1+offset2] still be a legal addressing mode?
5154 AM.BaseOffs = CombinedValue;
5155 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5156 AccessTy, AS))
5157 return true;
5158 }
5159
5160 return false;
5161}
5162
5164 MachineInstr *RHS,
5165 BuildFnTy &MatchInfo) const {
5166 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5167 Register Src1Reg = MI.getOperand(1).getReg();
5168 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5169 return false;
5170 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5171 if (!C2)
5172 return false;
5173
5174 // If both additions are nuw, the reassociated additions are also nuw.
5175 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5176 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5177 // therefore also nusw.
5178 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5179 // the new G_PTR_ADDs are then also inbounds.
5180 unsigned PtrAddFlags = MI.getFlags();
5181 unsigned AddFlags = RHS->getFlags();
5182 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5183 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5184 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5185 unsigned Flags = 0;
5186 if (IsNoUWrap)
5188 if (IsNoUSWrap)
5190 if (IsInBounds)
5192
5193 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5194 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5195
5196 auto NewBase =
5197 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5198 Observer.changingInstr(MI);
5199 MI.getOperand(1).setReg(NewBase.getReg(0));
5200 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5201 MI.setFlags(Flags);
5202 Observer.changedInstr(MI);
5203 };
5204 return !reassociationCanBreakAddressingModePattern(MI);
5205}
5206
5208 MachineInstr *LHS,
5209 MachineInstr *RHS,
5210 BuildFnTy &MatchInfo) const {
5211 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5212 // if and only if (G_PTR_ADD X, C) has one use.
5213 Register LHSBase;
5214 std::optional<ValueAndVReg> LHSCstOff;
5215 if (!mi_match(MI.getBaseReg(), MRI,
5216 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5217 return false;
5218
5219 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5220
5221 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5222 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5223 // so the new G_PTR_ADDs are also inbounds.
5224 unsigned PtrAddFlags = MI.getFlags();
5225 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5226 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5227 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5229 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5231 unsigned Flags = 0;
5232 if (IsNoUWrap)
5234 if (IsNoUSWrap)
5236 if (IsInBounds)
5238
5239 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5240 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5241 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5242 // doesn't happen.
5243 LHSPtrAdd->moveBefore(&MI);
5244 Register RHSReg = MI.getOffsetReg();
5245 // set VReg will cause type mismatch if it comes from extend/trunc
5246 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5247 Observer.changingInstr(MI);
5248 MI.getOperand(2).setReg(NewCst.getReg(0));
5249 MI.setFlags(Flags);
5250 Observer.changedInstr(MI);
5251 Observer.changingInstr(*LHSPtrAdd);
5252 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5253 LHSPtrAdd->setFlags(Flags);
5254 Observer.changedInstr(*LHSPtrAdd);
5255 };
5256 return !reassociationCanBreakAddressingModePattern(MI);
5257}
5258
5260 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5261 BuildFnTy &MatchInfo) const {
5262 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5263 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5264 if (!LHSPtrAdd)
5265 return false;
5266
5267 Register Src2Reg = MI.getOperand(2).getReg();
5268 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5269 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5270 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5271 if (!C1)
5272 return false;
5273 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5274 if (!C2)
5275 return false;
5276
5277 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5278 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5279 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5280 // largest signed integer that fits into the index type, which is the maximum
5281 // size of allocated objects according to the IR Language Reference.
5282 unsigned PtrAddFlags = MI.getFlags();
5283 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5284 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5285 bool IsInBounds =
5286 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5287 unsigned Flags = 0;
5288 if (IsNoUWrap)
5290 if (IsInBounds) {
5293 }
5294
5295 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5296 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5297 Observer.changingInstr(MI);
5298 MI.getOperand(1).setReg(LHSSrc1);
5299 MI.getOperand(2).setReg(NewCst.getReg(0));
5300 MI.setFlags(Flags);
5301 Observer.changedInstr(MI);
5302 };
5303 return !reassociationCanBreakAddressingModePattern(MI);
5304}
5305
5307 BuildFnTy &MatchInfo) const {
5308 auto &PtrAdd = cast<GPtrAdd>(MI);
5309 // We're trying to match a few pointer computation patterns here for
5310 // re-association opportunities.
5311 // 1) Isolating a constant operand to be on the RHS, e.g.:
5312 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5313 //
5314 // 2) Folding two constants in each sub-tree as long as such folding
5315 // doesn't break a legal addressing mode.
5316 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5317 //
5318 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5319 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5320 // iif (G_PTR_ADD X, C) has one use.
5321 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5322 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5323
5324 // Try to match example 2.
5325 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5326 return true;
5327
5328 // Try to match example 3.
5329 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5330 return true;
5331
5332 // Try to match example 1.
5333 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5334 return true;
5335
5336 return false;
5337}
5339 Register OpLHS, Register OpRHS,
5340 BuildFnTy &MatchInfo) const {
5341 LLT OpRHSTy = MRI.getType(OpRHS);
5342 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5343
5344 if (OpLHSDef->getOpcode() != Opc)
5345 return false;
5346
5347 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5348 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5349 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5350
5351 // If the inner op is (X op C), pull the constant out so it can be folded with
5352 // other constants in the expression tree. Folding is not guaranteed so we
5353 // might have (C1 op C2). In that case do not pull a constant out because it
5354 // won't help and can lead to infinite loops.
5355 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5356 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5357 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5358 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5359 MatchInfo = [=](MachineIRBuilder &B) {
5360 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5361 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5362 };
5363 return true;
5364 }
5365 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5366 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5367 // iff (op x, c1) has one use
5368 MatchInfo = [=](MachineIRBuilder &B) {
5369 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5370 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5371 };
5372 return true;
5373 }
5374 }
5375
5376 return false;
5377}
5378
5380 BuildFnTy &MatchInfo) const {
5381 // We don't check if the reassociation will break a legal addressing mode
5382 // here since pointer arithmetic is handled by G_PTR_ADD.
5383 unsigned Opc = MI.getOpcode();
5384 Register DstReg = MI.getOperand(0).getReg();
5385 Register LHSReg = MI.getOperand(1).getReg();
5386 Register RHSReg = MI.getOperand(2).getReg();
5387
5388 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5389 return true;
5390 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5391 return true;
5392 return false;
5393}
5394
5396 APInt &MatchInfo) const {
5397 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5398 Register SrcOp = MI.getOperand(1).getReg();
5399
5400 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5401 MatchInfo = *MaybeCst;
5402 return true;
5403 }
5404
5405 return false;
5406}
5407
5409 BuildFnTy &MatchInfo) const {
5410 Register Dst = MI.getOperand(0).getReg();
5411 auto Csts = ConstantFoldUnaryIntOp(MI.getOpcode(), MRI.getType(Dst),
5412 MI.getOperand(1).getReg(), MRI);
5413 if (Csts.empty())
5414 return false;
5415
5416 MatchInfo = [Dst, Csts = std::move(Csts)](MachineIRBuilder &B) {
5417 if (Csts.size() == 1)
5418 B.buildConstant(Dst, Csts[0]);
5419 else
5420 B.buildBuildVectorConstant(Dst, Csts);
5421 };
5422 return true;
5423}
5424
5426 APInt &MatchInfo) const {
5427 Register Op1 = MI.getOperand(1).getReg();
5428 Register Op2 = MI.getOperand(2).getReg();
5429 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5430 if (!MaybeCst)
5431 return false;
5432 MatchInfo = *MaybeCst;
5433 return true;
5434}
5435
5437 ConstantFP *&MatchInfo) const {
5438 Register Op1 = MI.getOperand(1).getReg();
5439 Register Op2 = MI.getOperand(2).getReg();
5440 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5441 if (!MaybeCst)
5442 return false;
5443 MatchInfo =
5444 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5445 return true;
5446}
5447
5449 ConstantFP *&MatchInfo) const {
5450 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5451 MI.getOpcode() == TargetOpcode::G_FMAD);
5452 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5453
5454 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5455 if (!Op3Cst)
5456 return false;
5457
5458 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5459 if (!Op2Cst)
5460 return false;
5461
5462 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5463 if (!Op1Cst)
5464 return false;
5465
5466 APFloat Op1F = Op1Cst->getValueAPF();
5467 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5469 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5470 return true;
5471}
5472
5475 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5476 // Look for a binop feeding into an AND with a mask:
5477 //
5478 // %add = G_ADD %lhs, %rhs
5479 // %and = G_AND %add, 000...11111111
5480 //
5481 // Check if it's possible to perform the binop at a narrower width and zext
5482 // back to the original width like so:
5483 //
5484 // %narrow_lhs = G_TRUNC %lhs
5485 // %narrow_rhs = G_TRUNC %rhs
5486 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5487 // %new_add = G_ZEXT %narrow_add
5488 // %and = G_AND %new_add, 000...11111111
5489 //
5490 // This can allow later combines to eliminate the G_AND if it turns out
5491 // that the mask is irrelevant.
5492 assert(MI.getOpcode() == TargetOpcode::G_AND);
5493 Register Dst = MI.getOperand(0).getReg();
5494 Register AndLHS = MI.getOperand(1).getReg();
5495 Register AndRHS = MI.getOperand(2).getReg();
5496 LLT WideTy = MRI.getType(Dst);
5497
5498 // If the potential binop has more than one use, then it's possible that one
5499 // of those uses will need its full width.
5500 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5501 return false;
5502
5503 // Check if the LHS feeding the AND is impacted by the high bits that we're
5504 // masking out.
5505 //
5506 // e.g. for 64-bit x, y:
5507 //
5508 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5509 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5510 if (!LHSInst)
5511 return false;
5512 unsigned LHSOpc = LHSInst->getOpcode();
5513 switch (LHSOpc) {
5514 default:
5515 return false;
5516 case TargetOpcode::G_ADD:
5517 case TargetOpcode::G_SUB:
5518 case TargetOpcode::G_MUL:
5519 case TargetOpcode::G_AND:
5520 case TargetOpcode::G_OR:
5521 case TargetOpcode::G_XOR:
5522 break;
5523 }
5524
5525 // Find the mask on the RHS.
5526 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5527 if (!Cst)
5528 return false;
5529 auto Mask = Cst->Value;
5530 if (!Mask.isMask())
5531 return false;
5532
5533 // No point in combining if there's nothing to truncate.
5534 unsigned NarrowWidth = Mask.countr_one();
5535 if (NarrowWidth == WideTy.getSizeInBits())
5536 return false;
5537 LLT NarrowTy = LLT::integer(NarrowWidth);
5538
5539 // Check if adding the zext + truncates could be harmful.
5540 auto &MF = *MI.getMF();
5541 const auto &TLI = getTargetLowering();
5542 LLVMContext &Ctx = MF.getFunction().getContext();
5543 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5544 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5545 return false;
5546 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5547 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5548 return false;
5549 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5550 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5551 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5552 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5553 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5554 auto NarrowBinOp =
5555 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5556 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5557 Observer.changingInstr(MI);
5558 MI.getOperand(1).setReg(Ext.getReg(0));
5559 Observer.changedInstr(MI);
5560 };
5561 return true;
5562}
5563
5565 BuildFnTy &MatchInfo) const {
5566 unsigned Opc = MI.getOpcode();
5567 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5568
5569 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5570 return false;
5571
5572 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5573 Observer.changingInstr(MI);
5574 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5575 : TargetOpcode::G_SADDO;
5576 MI.setDesc(Builder.getTII().get(NewOpc));
5577 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5578 Observer.changedInstr(MI);
5579 };
5580 return true;
5581}
5582
5584 BuildFnTy &MatchInfo) const {
5585 // (G_*MULO x, 0) -> 0 + no carry out
5586 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5587 MI.getOpcode() == TargetOpcode::G_SMULO);
5588 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5589 return false;
5590 Register Dst = MI.getOperand(0).getReg();
5591 Register Carry = MI.getOperand(1).getReg();
5592 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5593 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5594 return false;
5595 MatchInfo = [=](MachineIRBuilder &B) {
5596 B.buildConstant(Dst, 0);
5597 B.buildConstant(Carry, 0);
5598 };
5599 return true;
5600}
5601
5603 BuildFnTy &MatchInfo) const {
5604 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5605 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5606 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5607 MI.getOpcode() == TargetOpcode::G_SADDE ||
5608 MI.getOpcode() == TargetOpcode::G_USUBE ||
5609 MI.getOpcode() == TargetOpcode::G_SSUBE);
5610 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5611 return false;
5612 MatchInfo = [&](MachineIRBuilder &B) {
5613 unsigned NewOpcode;
5614 switch (MI.getOpcode()) {
5615 case TargetOpcode::G_UADDE:
5616 NewOpcode = TargetOpcode::G_UADDO;
5617 break;
5618 case TargetOpcode::G_SADDE:
5619 NewOpcode = TargetOpcode::G_SADDO;
5620 break;
5621 case TargetOpcode::G_USUBE:
5622 NewOpcode = TargetOpcode::G_USUBO;
5623 break;
5624 case TargetOpcode::G_SSUBE:
5625 NewOpcode = TargetOpcode::G_SSUBO;
5626 break;
5627 }
5628 Observer.changingInstr(MI);
5629 MI.setDesc(B.getTII().get(NewOpcode));
5630 MI.removeOperand(4);
5631 Observer.changedInstr(MI);
5632 };
5633 return true;
5634}
5635
5637 BuildFnTy &MatchInfo) const {
5638 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5639 Register Dst = MI.getOperand(0).getReg();
5640 // (x + y) - z -> x (if y == z)
5641 // (x + y) - z -> y (if x == z)
5642 Register X, Y, Z;
5643 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5644 Register ReplaceReg;
5645 int64_t CstX, CstY;
5646 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5648 ReplaceReg = X;
5649 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5651 ReplaceReg = Y;
5652 if (ReplaceReg) {
5653 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5654 return true;
5655 }
5656 }
5657
5658 // x - (y + z) -> 0 - y (if x == z)
5659 // x - (y + z) -> 0 - z (if x == y)
5660 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5661 Register ReplaceReg;
5662 int64_t CstX;
5663 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5665 ReplaceReg = Y;
5666 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5668 ReplaceReg = Z;
5669 if (ReplaceReg) {
5670 MatchInfo = [=](MachineIRBuilder &B) {
5671 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5672 B.buildSub(Dst, Zero, ReplaceReg);
5673 };
5674 return true;
5675 }
5676 }
5677 return false;
5678}
5679
5681 unsigned Opcode = MI.getOpcode();
5682 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5683 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5684 Register Dst = UDivorRem.getReg(0);
5685 Register LHS = UDivorRem.getReg(1);
5686 Register RHS = UDivorRem.getReg(2);
5687 LLT Ty = MRI.getType(Dst);
5688 LLT ScalarTy = Ty.getScalarType();
5689 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5691 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5692
5693 auto &MIB = Builder;
5694
5695 bool UseSRL = false;
5696 SmallVector<Register, 16> Shifts, Factors;
5697 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5698 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5699
5700 auto BuildExactUDIVPattern = [&](const Constant *C) {
5701 // Don't recompute inverses for each splat element.
5702 if (IsSplat && !Factors.empty()) {
5703 Shifts.push_back(Shifts[0]);
5704 Factors.push_back(Factors[0]);
5705 return true;
5706 }
5707
5708 auto *CI = cast<ConstantInt>(C);
5709 APInt Divisor = CI->getValue();
5710 unsigned Shift = Divisor.countr_zero();
5711 if (Shift) {
5712 Divisor.lshrInPlace(Shift);
5713 UseSRL = true;
5714 }
5715
5716 // Calculate the multiplicative inverse modulo BW.
5717 APInt Factor = Divisor.multiplicativeInverse();
5718 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5719 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5720 return true;
5721 };
5722
5723 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5724 // Collect all magic values from the build vector.
5725 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5726 llvm_unreachable("Expected unary predicate match to succeed");
5727
5728 Register Shift, Factor;
5729 if (Ty.isVector()) {
5730 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5731 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5732 } else {
5733 Shift = Shifts[0];
5734 Factor = Factors[0];
5735 }
5736
5737 Register Res = LHS;
5738
5739 if (UseSRL)
5740 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5741
5742 return MIB.buildMul(Ty, Res, Factor);
5743 }
5744
5745 unsigned KnownLeadingZeros =
5746 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5747
5748 bool UseNPQ = false;
5749 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5750 auto BuildUDIVPattern = [&](const Constant *C) {
5751 auto *CI = cast<ConstantInt>(C);
5752 const APInt &Divisor = CI->getValue();
5753
5754 bool SelNPQ = false;
5755 APInt Magic(Divisor.getBitWidth(), 0);
5756 unsigned PreShift = 0, PostShift = 0;
5757
5758 // Magic algorithm doesn't work for division by 1. We need to emit a select
5759 // at the end.
5760 // TODO: Use undef values for divisor of 1.
5761 if (!Divisor.isOne()) {
5762
5763 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5764 // in the dividend exceeds the leading zeros for the divisor.
5767 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5768
5769 Magic = std::move(magics.Magic);
5770
5771 assert(magics.PreShift < Divisor.getBitWidth() &&
5772 "We shouldn't generate an undefined shift!");
5773 assert(magics.PostShift < Divisor.getBitWidth() &&
5774 "We shouldn't generate an undefined shift!");
5775 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5776 PreShift = magics.PreShift;
5777 PostShift = magics.PostShift;
5778 SelNPQ = magics.IsAdd;
5779 }
5780
5781 PreShifts.push_back(
5782 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5783 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5784 NPQFactors.push_back(
5785 MIB.buildConstant(ScalarTy,
5786 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5787 : APInt::getZero(EltBits))
5788 .getReg(0));
5789 PostShifts.push_back(
5790 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5791 UseNPQ |= SelNPQ;
5792 return true;
5793 };
5794
5795 // Collect the shifts/magic values from each element.
5796 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5797 (void)Matched;
5798 assert(Matched && "Expected unary predicate match to succeed");
5799
5800 Register PreShift, PostShift, MagicFactor, NPQFactor;
5801 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5802 if (RHSDef) {
5803 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5804 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5805 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5806 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5807 } else {
5808 assert(MRI.getType(RHS).isScalar() &&
5809 "Non-build_vector operation should have been a scalar");
5810 PreShift = PreShifts[0];
5811 MagicFactor = MagicFactors[0];
5812 PostShift = PostShifts[0];
5813 }
5814
5815 Register Q = LHS;
5816 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5817
5818 // Multiply the numerator (operand 0) by the magic value.
5819 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5820
5821 if (UseNPQ) {
5822 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5823
5824 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5825 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5826 if (Ty.isVector())
5827 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5828 else
5829 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5830
5831 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5832 }
5833
5834 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5835 auto One = MIB.buildConstant(Ty, 1);
5836 auto IsOne = MIB.buildICmp(
5838 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5839 RHS, One);
5840 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5841
5842 if (Opcode == TargetOpcode::G_UREM) {
5843 auto Prod = MIB.buildMul(Ty, ret, RHS);
5844 return MIB.buildSub(Ty, LHS, Prod);
5845 }
5846 return ret;
5847}
5848
5850 unsigned Opcode = MI.getOpcode();
5851 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5852 Register Dst = MI.getOperand(0).getReg();
5853 Register RHS = MI.getOperand(2).getReg();
5854 LLT DstTy = MRI.getType(Dst);
5855
5856 auto &MF = *MI.getMF();
5857 AttributeList Attr = MF.getFunction().getAttributes();
5858 const auto &TLI = getTargetLowering();
5859 LLVMContext &Ctx = MF.getFunction().getContext();
5860 if (DstTy.getScalarSizeInBits() == 1 ||
5861 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5862 return false;
5863
5864 // Don't do this for minsize because the instruction sequence is usually
5865 // larger.
5866 if (MF.getFunction().hasMinSize())
5867 return false;
5868
5869 if (Opcode == TargetOpcode::G_UDIV &&
5871 return matchUnaryPredicate(
5872 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5873 }
5874
5875 auto *RHSDef = MRI.getVRegDef(RHS);
5876 if (!isConstantOrConstantVector(*RHSDef, MRI))
5877 return false;
5878
5879 // Don't do this if the types are not going to be legal.
5880 if (LI) {
5881 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5882 return false;
5883 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5884 return false;
5886 {TargetOpcode::G_ICMP,
5887 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5888 DstTy}}))
5889 return false;
5890 if (Opcode == TargetOpcode::G_UREM &&
5891 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5892 return false;
5893 }
5894
5895 return matchUnaryPredicate(
5896 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5897}
5898
5900 auto *NewMI = buildUDivOrURemUsingMul(MI);
5901 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5902}
5903
5905 unsigned Opcode = MI.getOpcode();
5906 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5907 Register Dst = MI.getOperand(0).getReg();
5908 Register RHS = MI.getOperand(2).getReg();
5909 LLT DstTy = MRI.getType(Dst);
5910 auto SizeInBits = DstTy.getScalarSizeInBits();
5911 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5912
5913 auto &MF = *MI.getMF();
5914 AttributeList Attr = MF.getFunction().getAttributes();
5915 const auto &TLI = getTargetLowering();
5916 LLVMContext &Ctx = MF.getFunction().getContext();
5917 if (DstTy.getScalarSizeInBits() < 3 ||
5918 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5919 return false;
5920
5921 // Don't do this for minsize because the instruction sequence is usually
5922 // larger.
5923 if (MF.getFunction().hasMinSize())
5924 return false;
5925
5926 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5927 if (Opcode == TargetOpcode::G_SDIV &&
5929 return matchUnaryPredicate(
5930 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5931 }
5932
5933 auto *RHSDef = MRI.getVRegDef(RHS);
5934 if (!isConstantOrConstantVector(*RHSDef, MRI))
5935 return false;
5936
5937 // Don't do this if the types are not going to be legal.
5938 if (LI) {
5939 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5940 return false;
5941 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5942 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5943 return false;
5944 if (Opcode == TargetOpcode::G_SREM &&
5945 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5946 return false;
5947 }
5948
5949 return matchUnaryPredicate(
5950 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5951}
5952
5954 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5955 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5956}
5957
5959 unsigned Opcode = MI.getOpcode();
5960 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5961 Opcode == TargetOpcode::G_SREM);
5962 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5963 Register Dst = SDivorRem.getReg(0);
5964 Register LHS = SDivorRem.getReg(1);
5965 Register RHS = SDivorRem.getReg(2);
5966 LLT Ty = MRI.getType(Dst);
5967 LLT ScalarTy = Ty.getScalarType();
5968 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5970 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5971 auto &MIB = Builder;
5972
5973 bool UseSRA = false;
5974 SmallVector<Register, 16> ExactShifts, ExactFactors;
5975
5976 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5977 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5978
5979 auto BuildExactSDIVPattern = [&](const Constant *C) {
5980 // Don't recompute inverses for each splat element.
5981 if (IsSplat && !ExactFactors.empty()) {
5982 ExactShifts.push_back(ExactShifts[0]);
5983 ExactFactors.push_back(ExactFactors[0]);
5984 return true;
5985 }
5986
5987 auto *CI = cast<ConstantInt>(C);
5988 APInt Divisor = CI->getValue();
5989 unsigned Shift = Divisor.countr_zero();
5990 if (Shift) {
5991 Divisor.ashrInPlace(Shift);
5992 UseSRA = true;
5993 }
5994
5995 // Calculate the multiplicative inverse modulo BW.
5996 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5997 APInt Factor = Divisor.multiplicativeInverse();
5998 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5999 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
6000 return true;
6001 };
6002
6003 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
6004 // Collect all magic values from the build vector.
6005 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
6006 (void)Matched;
6007 assert(Matched && "Expected unary predicate match to succeed");
6008
6009 Register Shift, Factor;
6010 if (Ty.isVector()) {
6011 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
6012 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
6013 } else {
6014 Shift = ExactShifts[0];
6015 Factor = ExactFactors[0];
6016 }
6017
6018 Register Res = LHS;
6019
6020 if (UseSRA)
6021 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
6022
6023 return MIB.buildMul(Ty, Res, Factor);
6024 }
6025
6026 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6027
6028 auto BuildSDIVPattern = [&](const Constant *C) {
6029 auto *CI = cast<ConstantInt>(C);
6030 const APInt &Divisor = CI->getValue();
6031
6034 int NumeratorFactor = 0;
6035 int ShiftMask = -1;
6036
6037 if (Divisor.isOne() || Divisor.isAllOnes()) {
6038 // If d is +1/-1, we just multiply the numerator by +1/-1.
6039 NumeratorFactor = Divisor.getSExtValue();
6040 Magics.Magic = 0;
6041 Magics.ShiftAmount = 0;
6042 ShiftMask = 0;
6043 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
6044 // If d > 0 and m < 0, add the numerator.
6045 NumeratorFactor = 1;
6046 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
6047 // If d < 0 and m > 0, subtract the numerator.
6048 NumeratorFactor = -1;
6049 }
6050
6051 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
6052 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
6053 Shifts.push_back(
6054 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
6055 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
6056
6057 return true;
6058 };
6059
6060 // Collect the shifts/magic values from each element.
6061 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
6062 (void)Matched;
6063 assert(Matched && "Expected unary predicate match to succeed");
6064
6065 Register MagicFactor, Factor, Shift, ShiftMask;
6066 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
6067 if (RHSDef) {
6068 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
6069 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
6070 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
6071 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
6072 } else {
6073 assert(MRI.getType(RHS).isScalar() &&
6074 "Non-build_vector operation should have been a scalar");
6075 MagicFactor = MagicFactors[0];
6076 Factor = Factors[0];
6077 Shift = Shifts[0];
6078 ShiftMask = ShiftMasks[0];
6079 }
6080
6081 Register Q = LHS;
6082 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
6083
6084 // (Optionally) Add/subtract the numerator using Factor.
6085 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
6086 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
6087
6088 // Shift right algebraic by shift value.
6089 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
6090
6091 // Extract the sign bit, mask it and add it to the quotient.
6092 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
6093 auto T = MIB.buildLShr(Ty, Q, SignShift);
6094 T = MIB.buildAnd(Ty, T, ShiftMask);
6095 auto ret = MIB.buildAdd(Ty, Q, T);
6096
6097 if (Opcode == TargetOpcode::G_SREM) {
6098 auto Prod = MIB.buildMul(Ty, ret, RHS);
6099 return MIB.buildSub(Ty, LHS, Prod);
6100 }
6101 return ret;
6102}
6103
6105 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6106 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6107 "Expected SDIV or UDIV");
6108 auto &Div = cast<GenericMachineInstr>(MI);
6109 Register RHS = Div.getReg(2);
6110 auto MatchPow2 = [&](const Constant *C) {
6111 auto *CI = dyn_cast<ConstantInt>(C);
6112 return CI && (CI->getValue().isPowerOf2() ||
6113 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6114 };
6115 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6116}
6117
6119 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6120 auto &SDiv = cast<GenericMachineInstr>(MI);
6121 Register Dst = SDiv.getReg(0);
6122 Register LHS = SDiv.getReg(1);
6123 Register RHS = SDiv.getReg(2);
6124 LLT Ty = MRI.getType(Dst);
6126 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6127 : LLT::integer(1);
6128
6129 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6130 // to the following version:
6131 //
6132 // %c1 = G_CTTZ %rhs
6133 // %inexact = G_SUB $bitwidth, %c1
6134 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6135 // %lshr = G_LSHR %sign, %inexact
6136 // %add = G_ADD %lhs, %lshr
6137 // %ashr = G_ASHR %add, %c1
6138 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6139 // %zero = G_CONSTANT $0
6140 // %neg = G_NEG %ashr
6141 // %isneg = G_ICMP SLT %rhs, %zero
6142 // %res = G_SELECT %isneg, %neg, %ashr
6143
6144 unsigned BitWidth = Ty.getScalarSizeInBits();
6145 auto Zero = Builder.buildConstant(Ty, 0);
6146
6147 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6148 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6149 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6150 // Splat the sign bit into the register
6151 auto Sign = Builder.buildAShr(
6152 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6153
6154 // Add (LHS < 0) ? abs2 - 1 : 0;
6155 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6156 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6157 auto AShr = Builder.buildAShr(Ty, Add, C1);
6158
6159 // Special case: (sdiv X, 1) -> X
6160 // Special Case: (sdiv X, -1) -> 0-X
6161 auto One = Builder.buildConstant(Ty, 1);
6162 auto MinusOne = Builder.buildConstant(Ty, -1);
6163 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6164 auto IsMinusOne =
6165 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6166 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6167 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6168
6169 // If divided by a positive value, we're done. Otherwise, the result must be
6170 // negated.
6171 auto Neg = Builder.buildNeg(Ty, AShr);
6172 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6173 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6174 MI.eraseFromParent();
6175}
6176
6178 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6179 auto &UDiv = cast<GenericMachineInstr>(MI);
6180 Register Dst = UDiv.getReg(0);
6181 Register LHS = UDiv.getReg(1);
6182 Register RHS = UDiv.getReg(2);
6183 LLT Ty = MRI.getType(Dst);
6185
6186 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6187 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6188 MI.eraseFromParent();
6189}
6190
6192 assert(MI.getOpcode() == TargetOpcode::G_SREM && "Expected SREM");
6193 auto &SRem = cast<GBinOp>(MI);
6194 Register Dst = SRem.getReg(0);
6195 Register LHS = SRem.getLHSReg();
6196 Register RHS = SRem.getRHSReg();
6197 LLT Ty = MRI.getType(Dst);
6199
6200 // Effectively we want to lower G_SREM %lhs, %rhs, where %rhs is +/- a power
6201 // of 2, to the following branch-free bias-and-mask version:
6202 //
6203 // %abs = G_ABS %rhs
6204 // %mask = G_SUB %abs, 1
6205 // %sign = G_ASHR %lhs, $(bitwidth - 1)
6206 // %bias = G_AND %sign, %mask
6207 // %biased = G_ADD %lhs, %bias
6208 // %masked = G_AND %biased, %mask
6209 // %res = G_SUB %masked, %bias
6210 //
6211 // The bias adds (|%rhs| - 1) for negative %lhs, correcting rounding towards
6212 // zero (instead of towards -inf that a plain mask would give). Constant
6213 // divisors collapse %mask to a single G_CONSTANT via the CSEMIRBuilder folds
6214 // for G_ABS and G_SUB.
6215
6216 unsigned BitWidth = Ty.getScalarSizeInBits();
6217 auto AbsRHS = Builder.buildAbs(Ty, RHS);
6218 auto Mask = Builder.buildSub(Ty, AbsRHS, Builder.buildConstant(Ty, 1));
6219 auto BWMinusOne = Builder.buildConstant(ShiftAmtTy, BitWidth - 1);
6220 auto Sign = Builder.buildAShr(Ty, LHS, BWMinusOne);
6221 auto Bias = Builder.buildAnd(Ty, Sign, Mask);
6222 auto Biased = Builder.buildAdd(Ty, LHS, Bias);
6223 auto Masked = Builder.buildAnd(Ty, Biased, Mask);
6224 Builder.buildSub(Dst, Masked, Bias);
6225 MI.eraseFromParent();
6226}
6227
6229 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6230 Register RHS = MI.getOperand(2).getReg();
6231 Register Dst = MI.getOperand(0).getReg();
6232 LLT Ty = MRI.getType(Dst);
6233 LLT RHSTy = MRI.getType(RHS);
6235 auto MatchPow2ExceptOne = [&](const Constant *C) {
6236 if (auto *CI = dyn_cast<ConstantInt>(C))
6237 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6238 return false;
6239 };
6240 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6241 return false;
6242 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6243 // get log base 2, and it is not always legal for on a target.
6244 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6245 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6246}
6247
6249 Register LHS = MI.getOperand(1).getReg();
6250 Register RHS = MI.getOperand(2).getReg();
6251 Register Dst = MI.getOperand(0).getReg();
6252 LLT Ty = MRI.getType(Dst);
6254 unsigned NumEltBits = Ty.getScalarSizeInBits();
6255
6256 auto LogBase2 = buildLogBase2(RHS, Builder);
6257 auto ShiftAmt =
6258 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6259 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6260 Builder.buildLShr(Dst, LHS, Trunc);
6261 MI.eraseFromParent();
6262}
6263
6265 Register &MatchInfo) const {
6266 Register Dst = MI.getOperand(0).getReg();
6267 Register Src = MI.getOperand(1).getReg();
6268 LLT DstTy = MRI.getType(Dst);
6269 LLT SrcTy = MRI.getType(Src);
6270 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6271 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6272 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6273
6275 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6276 return false;
6277
6278 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6279 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6280 return mi_match(Src, MRI,
6281 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6282 m_SpecificICstOrSplat(SignedMin)),
6283 m_SpecificICstOrSplat(SignedMax))) ||
6284 mi_match(Src, MRI,
6285 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6286 m_SpecificICstOrSplat(SignedMax)),
6287 m_SpecificICstOrSplat(SignedMin)));
6288}
6289
6291 Register &MatchInfo) const {
6292 Register Dst = MI.getOperand(0).getReg();
6293 Builder.buildTruncSSatS(Dst, MatchInfo);
6294 MI.eraseFromParent();
6295}
6296
6298 Register &MatchInfo) const {
6299 Register Dst = MI.getOperand(0).getReg();
6300 Register Src = MI.getOperand(1).getReg();
6301 LLT DstTy = MRI.getType(Dst);
6302 LLT SrcTy = MRI.getType(Src);
6303 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6304 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6305 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6306
6308 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6309 return false;
6310 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6311 return mi_match(Src, MRI,
6313 m_SpecificICstOrSplat(UnsignedMax))) ||
6314 mi_match(Src, MRI,
6315 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6316 m_SpecificICstOrSplat(UnsignedMax)),
6317 m_SpecificICstOrSplat(0))) ||
6318 mi_match(Src, MRI,
6320 m_SpecificICstOrSplat(UnsignedMax)));
6321}
6322
6324 Register &MatchInfo) const {
6325 Register Dst = MI.getOperand(0).getReg();
6326 Builder.buildTruncSSatU(Dst, MatchInfo);
6327 MI.eraseFromParent();
6328}
6329
6331 MachineInstr &MinMI) const {
6332 Register Min = MinMI.getOperand(2).getReg();
6333 Register Val = MinMI.getOperand(1).getReg();
6334 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6335 LLT SrcTy = MRI.getType(Val);
6336 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6337 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6338 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6339
6341 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6342 return false;
6343 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6344 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6345 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6346}
6347
6349 MachineInstr &SrcMI) const {
6350 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6351 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6352
6353 return LI &&
6354 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6355}
6356
6358 BuildFnTy &MatchInfo) const {
6359 unsigned Opc = MI.getOpcode();
6360 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6361 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6362 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6363
6364 Register Dst = MI.getOperand(0).getReg();
6365 Register X = MI.getOperand(1).getReg();
6366 Register Y = MI.getOperand(2).getReg();
6367 LLT Type = MRI.getType(Dst);
6368
6369 // fold (fadd x, fneg(y)) -> (fsub x, y)
6370 // fold (fadd fneg(y), x) -> (fsub x, y)
6371 // G_ADD is commutative so both cases are checked by m_GFAdd
6372 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6373 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6374 Opc = TargetOpcode::G_FSUB;
6375 }
6376 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6377 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6378 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6379 Opc = TargetOpcode::G_FADD;
6380 }
6381 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6382 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6383 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6384 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6385 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6386 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6387 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6388 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6389 // no opcode change
6390 } else
6391 return false;
6392
6393 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6394 Observer.changingInstr(MI);
6395 MI.setDesc(B.getTII().get(Opc));
6396 MI.getOperand(1).setReg(X);
6397 MI.getOperand(2).setReg(Y);
6398 Observer.changedInstr(MI);
6399 };
6400 return true;
6401}
6402
6404 Register &MatchInfo) const {
6405 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6406
6407 Register LHS = MI.getOperand(1).getReg();
6408 MatchInfo = MI.getOperand(2).getReg();
6409 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6410
6411 const auto LHSCst = Ty.isVector()
6412 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6414 if (!LHSCst)
6415 return false;
6416
6417 // -0.0 is always allowed
6418 if (LHSCst->Value.isNegZero())
6419 return true;
6420
6421 // +0.0 is only allowed if nsz is set.
6422 if (LHSCst->Value.isPosZero())
6423 return MI.getFlag(MachineInstr::FmNsz);
6424
6425 return false;
6426}
6427
6429 Register &MatchInfo) const {
6430 Register Dst = MI.getOperand(0).getReg();
6431 Builder.buildFNeg(
6432 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6433 eraseInst(MI);
6434}
6435
6436/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6437/// due to global flags or MachineInstr flags.
6438static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6439 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6440 return false;
6441 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6442}
6443
6444static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6445 const MachineRegisterInfo &MRI) {
6446 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6447 MRI.use_instr_nodbg_end()) >
6448 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6449 MRI.use_instr_nodbg_end());
6450}
6451
6453 bool &AllowFusionGlobally,
6454 bool &HasFMAD, bool &Aggressive,
6455 bool CanReassociate) const {
6456
6457 auto *MF = MI.getMF();
6458 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6459 const TargetOptions &Options = MF->getTarget().Options;
6460 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6461
6462 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6463 return false;
6464
6465 // Floating-point multiply-add with intermediate rounding.
6466 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6467 // Floating-point multiply-add without intermediate rounding.
6468 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6469 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6470 // No valid opcode, do not combine.
6471 if (!HasFMAD && !HasFMA)
6472 return false;
6473
6474 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6475 // If the addition is not contractable, do not combine.
6476 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6477 return false;
6478
6479 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6480 return true;
6481}
6482
6485 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6486 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6487
6488 bool AllowFusionGlobally, HasFMAD, Aggressive;
6489 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6490 return false;
6491
6492 Register Op1 = MI.getOperand(1).getReg();
6493 Register Op2 = MI.getOperand(2).getReg();
6494 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6495 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6496 unsigned PreferredFusedOpcode =
6497 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6498
6499 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6500 // prefer to fold the multiply with fewer uses.
6501 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6502 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6503 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6504 std::swap(LHS, RHS);
6505 }
6506
6507 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6508 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6509 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6510 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6511 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6512 {LHS.MI->getOperand(1).getReg(),
6513 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6514 };
6515 return true;
6516 }
6517
6518 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6519 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6520 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6521 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6522 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6523 {RHS.MI->getOperand(1).getReg(),
6524 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6525 };
6526 return true;
6527 }
6528
6529 return false;
6530}
6531
6534 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6535 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6536
6537 bool AllowFusionGlobally, HasFMAD, Aggressive;
6538 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6539 return false;
6540
6541 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6542 Register Op1 = MI.getOperand(1).getReg();
6543 Register Op2 = MI.getOperand(2).getReg();
6544 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6545 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6546 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6547
6548 unsigned PreferredFusedOpcode =
6549 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6550
6551 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6552 // prefer to fold the multiply with fewer uses.
6553 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6554 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6555 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6556 std::swap(LHS, RHS);
6557 }
6558
6559 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6560 MachineInstr *FpExtSrc;
6561 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6562 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6563 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6564 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6565 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6566 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6567 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6568 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6569 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6570 };
6571 return true;
6572 }
6573
6574 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6575 // Note: Commutes FADD operands.
6576 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6577 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6578 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6579 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6580 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6581 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6582 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6583 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6584 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6585 };
6586 return true;
6587 }
6588
6589 return false;
6590}
6591
6594 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6595 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6596
6597 bool AllowFusionGlobally, HasFMAD, Aggressive;
6598 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6599 return false;
6600
6601 Register Op1 = MI.getOperand(1).getReg();
6602 Register Op2 = MI.getOperand(2).getReg();
6603 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6604 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6605 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6606
6607 unsigned PreferredFusedOpcode =
6608 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6609
6610 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6611 // prefer to fold the multiply with fewer uses.
6612 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6613 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6614 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6615 std::swap(LHS, RHS);
6616 }
6617
6618 MachineInstr *FMA = nullptr;
6619 Register Z;
6620 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6621 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6622 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6623 TargetOpcode::G_FMUL) &&
6624 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6625 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6626 FMA = LHS.MI;
6627 Z = RHS.Reg;
6628 }
6629 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6630 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6631 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6632 TargetOpcode::G_FMUL) &&
6633 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6634 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6635 Z = LHS.Reg;
6636 FMA = RHS.MI;
6637 }
6638
6639 if (FMA) {
6640 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6641 Register X = FMA->getOperand(1).getReg();
6642 Register Y = FMA->getOperand(2).getReg();
6643 Register U = FMulMI->getOperand(1).getReg();
6644 Register V = FMulMI->getOperand(2).getReg();
6645
6646 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6647 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6648 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6649 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6650 {X, Y, InnerFMA});
6651 };
6652 return true;
6653 }
6654
6655 return false;
6656}
6657
6660 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6661 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6662
6663 bool AllowFusionGlobally, HasFMAD, Aggressive;
6664 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6665 return false;
6666
6667 if (!Aggressive)
6668 return false;
6669
6670 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6671 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6672 Register Op1 = MI.getOperand(1).getReg();
6673 Register Op2 = MI.getOperand(2).getReg();
6674 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6675 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6676
6677 unsigned PreferredFusedOpcode =
6678 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6679
6680 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6681 // prefer to fold the multiply with fewer uses.
6682 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6683 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6684 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6685 std::swap(LHS, RHS);
6686 }
6687
6688 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6689 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6691 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6692 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6693 Register InnerFMA =
6694 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6695 .getReg(0);
6696 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6697 {X, Y, InnerFMA});
6698 };
6699
6700 MachineInstr *FMulMI, *FMAMI;
6701 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6702 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6703 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6704 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6705 m_GFPExt(m_MInstr(FMulMI))) &&
6706 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6707 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6708 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6709 MatchInfo = [=](MachineIRBuilder &B) {
6710 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6711 FMulMI->getOperand(2).getReg(), RHS.Reg,
6712 LHS.MI->getOperand(1).getReg(),
6713 LHS.MI->getOperand(2).getReg(), B);
6714 };
6715 return true;
6716 }
6717
6718 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6719 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6720 // FIXME: This turns two single-precision and one double-precision
6721 // operation into two double-precision operations, which might not be
6722 // interesting for all targets, especially GPUs.
6723 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6724 FMAMI->getOpcode() == PreferredFusedOpcode) {
6725 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6726 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6727 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6728 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6729 MatchInfo = [=](MachineIRBuilder &B) {
6730 Register X = FMAMI->getOperand(1).getReg();
6731 Register Y = FMAMI->getOperand(2).getReg();
6732 X = B.buildFPExt(DstType, X).getReg(0);
6733 Y = B.buildFPExt(DstType, Y).getReg(0);
6734 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6735 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6736 };
6737
6738 return true;
6739 }
6740 }
6741
6742 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6743 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6744 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6745 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6746 m_GFPExt(m_MInstr(FMulMI))) &&
6747 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6748 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6749 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6750 MatchInfo = [=](MachineIRBuilder &B) {
6751 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6752 FMulMI->getOperand(2).getReg(), LHS.Reg,
6753 RHS.MI->getOperand(1).getReg(),
6754 RHS.MI->getOperand(2).getReg(), B);
6755 };
6756 return true;
6757 }
6758
6759 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6760 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6761 // FIXME: This turns two single-precision and one double-precision
6762 // operation into two double-precision operations, which might not be
6763 // interesting for all targets, especially GPUs.
6764 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6765 FMAMI->getOpcode() == PreferredFusedOpcode) {
6766 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6767 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6768 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6769 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6770 MatchInfo = [=](MachineIRBuilder &B) {
6771 Register X = FMAMI->getOperand(1).getReg();
6772 Register Y = FMAMI->getOperand(2).getReg();
6773 X = B.buildFPExt(DstType, X).getReg(0);
6774 Y = B.buildFPExt(DstType, Y).getReg(0);
6775 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6776 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6777 };
6778 return true;
6779 }
6780 }
6781
6782 return false;
6783}
6784
6787 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6788 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6789
6790 bool AllowFusionGlobally, HasFMAD, Aggressive;
6791 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6792 return false;
6793
6794 Register Op1 = MI.getOperand(1).getReg();
6795 Register Op2 = MI.getOperand(2).getReg();
6796 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6797 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6798 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6799
6800 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6801 // prefer to fold the multiply with fewer uses.
6802 int FirstMulHasFewerUses = true;
6803 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6804 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6805 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6806 FirstMulHasFewerUses = false;
6807
6808 unsigned PreferredFusedOpcode =
6809 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6810
6811 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6812 if (FirstMulHasFewerUses &&
6813 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6814 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6815 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6816 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6817 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6818 {LHS.MI->getOperand(1).getReg(),
6819 LHS.MI->getOperand(2).getReg(), NegZ});
6820 };
6821 return true;
6822 }
6823 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6824 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6825 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6826 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6827 Register NegY =
6828 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6829 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6830 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6831 };
6832 return true;
6833 }
6834
6835 return false;
6836}
6837
6840 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6841 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6842
6843 bool AllowFusionGlobally, HasFMAD, Aggressive;
6844 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6845 return false;
6846
6847 Register LHSReg = MI.getOperand(1).getReg();
6848 Register RHSReg = MI.getOperand(2).getReg();
6849 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6850
6851 unsigned PreferredFusedOpcode =
6852 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6853
6854 MachineInstr *FMulMI;
6855 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6856 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6857 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6858 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6859 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6860 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6861 Register NegX =
6862 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6863 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6864 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6865 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6866 };
6867 return true;
6868 }
6869
6870 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6871 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6872 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6873 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6874 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6875 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6876 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6877 {FMulMI->getOperand(1).getReg(),
6878 FMulMI->getOperand(2).getReg(), LHSReg});
6879 };
6880 return true;
6881 }
6882
6883 return false;
6884}
6885
6888 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6889 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6890
6891 bool AllowFusionGlobally, HasFMAD, Aggressive;
6892 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6893 return false;
6894
6895 Register LHSReg = MI.getOperand(1).getReg();
6896 Register RHSReg = MI.getOperand(2).getReg();
6897 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6898
6899 unsigned PreferredFusedOpcode =
6900 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6901
6902 MachineInstr *FMulMI;
6903 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6904 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6905 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6906 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6907 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6908 Register FpExtX =
6909 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6910 Register FpExtY =
6911 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6912 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6913 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6914 {FpExtX, FpExtY, NegZ});
6915 };
6916 return true;
6917 }
6918
6919 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6920 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6921 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6922 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6923 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6924 Register FpExtY =
6925 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6926 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6927 Register FpExtZ =
6928 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6929 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6930 {NegY, FpExtZ, LHSReg});
6931 };
6932 return true;
6933 }
6934
6935 return false;
6936}
6937
6940 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6941 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6942
6943 bool AllowFusionGlobally, HasFMAD, Aggressive;
6944 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6945 return false;
6946
6947 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6948 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6949 Register LHSReg = MI.getOperand(1).getReg();
6950 Register RHSReg = MI.getOperand(2).getReg();
6951
6952 unsigned PreferredFusedOpcode =
6953 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6954
6955 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6957 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6958 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6959 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6960 };
6961
6962 MachineInstr *FMulMI;
6963 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6964 // (fneg (fma (fpext x), (fpext y), z))
6965 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6966 // (fneg (fma (fpext x), (fpext y), z))
6967 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6968 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6969 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6970 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6971 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6972 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6973 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6974 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6975 FMulMI->getOperand(2).getReg(), RHSReg, B);
6976 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6977 };
6978 return true;
6979 }
6980
6981 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6982 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6983 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6984 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6985 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6986 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6987 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6988 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6989 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6990 FMulMI->getOperand(2).getReg(), LHSReg, B);
6991 };
6992 return true;
6993 }
6994
6995 return false;
6996}
6997
6999 unsigned &IdxToPropagate) const {
7000 bool PropagateNaN;
7001 switch (MI.getOpcode()) {
7002 default:
7003 return false;
7004 case TargetOpcode::G_FMINNUM:
7005 case TargetOpcode::G_FMAXNUM:
7006 PropagateNaN = false;
7007 break;
7008 case TargetOpcode::G_FMINIMUM:
7009 case TargetOpcode::G_FMAXIMUM:
7010 PropagateNaN = true;
7011 break;
7012 }
7013
7014 auto MatchNaN = [&](unsigned Idx) {
7015 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
7016 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
7017 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
7018 return false;
7019 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
7020 return true;
7021 };
7022
7023 return MatchNaN(1) || MatchNaN(2);
7024}
7025
7026// Combine multiple FDIVs with the same divisor into multiple FMULs by the
7027// reciprocal.
7028// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
7030 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
7031 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
7032
7033 Register X = MI.getOperand(1).getReg();
7034 Register Y = MI.getOperand(2).getReg();
7035
7036 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
7037 return false;
7038
7039 auto IsOne = [this](Register X) {
7040 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
7041 return N0CFP && (N0CFP->isOne() || N0CFP->isMinusOne());
7042 };
7043
7044 // Skip if current node is a reciprocal/fneg-reciprocal.
7045 if (IsOne(X))
7046 return false;
7047
7048 // Exit early if the target does not want this transform or if there can't
7049 // possibly be enough uses of the divisor to make the transform worthwhile.
7050 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
7051 if (!MinUses)
7052 return false;
7053
7054 // Find all FDIV users of the same divisor. For the moment we limit all
7055 // instructions to a single BB and use the first Instr in MatchInfo as the
7056 // dominating position.
7057 MatchInfo.push_back(&MI);
7058 for (auto &U : MRI.use_nodbg_instructions(Y)) {
7059 if (&U == &MI || U.getParent() != MI.getParent())
7060 continue;
7061 if (U.getOpcode() == TargetOpcode::G_FDIV &&
7062 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y &&
7063 !IsOne(U.getOperand(1).getReg())) {
7064 // This division is eligible for optimization only if global unsafe math
7065 // is enabled or if this division allows reciprocal formation.
7066 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
7067 MatchInfo.push_back(&U);
7068 if (dominates(U, *MatchInfo[0]))
7069 std::swap(MatchInfo[0], MatchInfo.back());
7070 }
7071 }
7072 }
7073
7074 // Now that we have the actual number of divisor uses, make sure it meets
7075 // the minimum threshold specified by the target.
7076 return MatchInfo.size() >= MinUses;
7077}
7078
7080 SmallVector<MachineInstr *> &MatchInfo) const {
7081 // Generate the new div at the position of the first instruction, that we have
7082 // ensured will dominate all other instructions.
7083 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
7084 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
7085 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
7086 MatchInfo[0]->getOperand(2).getReg(),
7087 MatchInfo[0]->getFlags());
7088
7089 // Replace all found div's with fmul instructions.
7090 for (MachineInstr *MI : MatchInfo) {
7091 Builder.setInsertPt(*MI->getParent(), MI);
7092 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
7093 Div->getOperand(0).getReg(), MI->getFlags());
7094 MI->eraseFromParent();
7095 }
7096}
7097
7099 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
7100 Register LHS = MI.getOperand(1).getReg();
7101 Register RHS = MI.getOperand(2).getReg();
7102
7103 // Helper lambda to check for opportunities for
7104 // A + (B - A) -> B
7105 // (B - A) + A -> B
7106 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
7107 Register Reg;
7108 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
7109 Reg == MaybeSameReg;
7110 };
7111 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
7112}
7113
7115 Register &MatchInfo) const {
7116 // This combine folds the following patterns:
7117 //
7118 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
7119 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
7120 // into
7121 // x
7122 // if
7123 // k == sizeof(VecEltTy)/2
7124 // type(x) == type(dst)
7125 //
7126 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
7127 // into
7128 // x
7129 // if
7130 // type(x) == type(dst)
7131
7132 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
7133 LLT DstEltTy = DstVecTy.getElementType();
7134
7135 Register Lo, Hi;
7136
7137 if (mi_match(
7138 MI, MRI,
7140 MatchInfo = Lo;
7141 return MRI.getType(MatchInfo) == DstVecTy;
7142 }
7143
7144 std::optional<ValueAndVReg> ShiftAmount;
7145 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7146 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7147 if (mi_match(
7148 MI, MRI,
7149 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7150 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7151 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7152 MatchInfo = Lo;
7153 return MRI.getType(MatchInfo) == DstVecTy;
7154 }
7155 }
7156
7157 return false;
7158}
7159
7161 Register &MatchInfo) const {
7162 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7163 // if type(x) == type(G_TRUNC)
7164 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7165 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7166 return false;
7167
7168 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7169}
7170
7172 Register &MatchInfo) const {
7173 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7174 // y if K == size of vector element type
7175 std::optional<ValueAndVReg> ShiftAmt;
7176 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7178 m_GCst(ShiftAmt))))
7179 return false;
7180
7181 LLT MatchTy = MRI.getType(MatchInfo);
7182 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7183 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7184}
7185
7186unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7187 CmpInst::Predicate Pred, LLT DstTy,
7188 SelectPatternNaNBehaviour VsNaNRetVal) const {
7189 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7190 "Expected a NaN behaviour?");
7191 // Choose an opcode based off of legality or the behaviour when one of the
7192 // LHS/RHS may be NaN.
7193 switch (Pred) {
7194 default:
7195 return 0;
7196 case CmpInst::FCMP_UGT:
7197 case CmpInst::FCMP_UGE:
7198 case CmpInst::FCMP_OGT:
7199 case CmpInst::FCMP_OGE:
7200 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7201 return TargetOpcode::G_FMAXNUM;
7202 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7203 return TargetOpcode::G_FMAXIMUM;
7204 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7205 return TargetOpcode::G_FMAXNUM;
7206 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7207 return TargetOpcode::G_FMAXIMUM;
7208 return 0;
7209 case CmpInst::FCMP_ULT:
7210 case CmpInst::FCMP_ULE:
7211 case CmpInst::FCMP_OLT:
7212 case CmpInst::FCMP_OLE:
7213 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7214 return TargetOpcode::G_FMINNUM;
7215 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7216 return TargetOpcode::G_FMINIMUM;
7217 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7218 return TargetOpcode::G_FMINNUM;
7219 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7220 return 0;
7221 return TargetOpcode::G_FMINIMUM;
7222 }
7223}
7224
7225CombinerHelper::SelectPatternNaNBehaviour
7226CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7227 bool IsOrderedComparison) const {
7228 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7229 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7230 // Completely unsafe.
7231 if (!LHSSafe && !RHSSafe)
7232 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7233 if (LHSSafe && RHSSafe)
7234 return SelectPatternNaNBehaviour::RETURNS_ANY;
7235 // An ordered comparison will return false when given a NaN, so it
7236 // returns the RHS.
7237 if (IsOrderedComparison)
7238 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7239 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7240 // An unordered comparison will return true when given a NaN, so it
7241 // returns the LHS.
7242 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7243 : SelectPatternNaNBehaviour::RETURNS_NAN;
7244}
7245
7246bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7247 Register TrueVal, Register FalseVal,
7248 BuildFnTy &MatchInfo) const {
7249 // Match: select (fcmp cond x, y) x, y
7250 // select (fcmp cond x, y) y, x
7251 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7252 LLT DstTy = MRI.getType(Dst);
7253 // Bail out early on pointers, since we'll never want to fold to a min/max.
7254 if (DstTy.isPointer())
7255 return false;
7256 // Match a floating point compare with a less-than/greater-than predicate.
7257 // TODO: Allow multiple users of the compare if they are all selects.
7258 CmpInst::Predicate Pred;
7259 Register CmpLHS, CmpRHS;
7260 if (!mi_match(Cond, MRI,
7262 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7263 CmpInst::isEquality(Pred))
7264 return false;
7265 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7266 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7267 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7268 return false;
7269 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7270 std::swap(CmpLHS, CmpRHS);
7271 Pred = CmpInst::getSwappedPredicate(Pred);
7272 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7273 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7274 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7275 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7276 }
7277 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7278 return false;
7279 // Decide what type of max/min this should be based off of the predicate.
7280 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7281 if (!Opc || !isLegal({Opc, {DstTy}}))
7282 return false;
7283 // Comparisons between signed zero and zero may have different results...
7284 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7285 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7286 // We don't know if a comparison between two 0s will give us a consistent
7287 // result. Be conservative and only proceed if at least one side is
7288 // non-zero.
7289 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7290 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7291 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7292 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7293 return false;
7294 }
7295 }
7296 MatchInfo = [=](MachineIRBuilder &B) {
7297 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7298 };
7299 return true;
7300}
7301
7303 BuildFnTy &MatchInfo) const {
7304 // TODO: Handle integer cases.
7305 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7306 // Condition may be fed by a truncated compare.
7307 Register Cond = MI.getOperand(1).getReg();
7308 Register MaybeTrunc;
7309 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7310 Cond = MaybeTrunc;
7311 Register Dst = MI.getOperand(0).getReg();
7312 Register TrueVal = MI.getOperand(2).getReg();
7313 Register FalseVal = MI.getOperand(3).getReg();
7314 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7315}
7316
7318 BuildFnTy &MatchInfo) const {
7319 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7320 // (X + Y) == X --> Y == 0
7321 // (X + Y) != X --> Y != 0
7322 // (X - Y) == X --> Y == 0
7323 // (X - Y) != X --> Y != 0
7324 // (X ^ Y) == X --> Y == 0
7325 // (X ^ Y) != X --> Y != 0
7326 Register Dst = MI.getOperand(0).getReg();
7327 CmpInst::Predicate Pred;
7328 Register X, Y, OpLHS, OpRHS;
7329 bool MatchedSub = mi_match(
7330 Dst, MRI,
7331 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7332 if (MatchedSub && X != OpLHS)
7333 return false;
7334 if (!MatchedSub) {
7335 if (!mi_match(Dst, MRI,
7336 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7337 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7338 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7339 return false;
7340 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7341 }
7342 MatchInfo = [=](MachineIRBuilder &B) {
7343 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7344 B.buildICmp(Pred, Dst, Y, Zero);
7345 };
7346 return CmpInst::isEquality(Pred) && Y.isValid();
7347}
7348
7349/// Return the minimum useless shift amount that results in complete loss of the
7350/// source value. Return std::nullopt when it cannot determine a value.
7351static std::optional<unsigned>
7352getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7353 std::optional<int64_t> &Result) {
7354 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7355 Opcode == TargetOpcode::G_ASHR) &&
7356 "Expect G_SHL, G_LSHR or G_ASHR.");
7357 auto SignificantBits = 0;
7358 switch (Opcode) {
7359 case TargetOpcode::G_SHL:
7360 SignificantBits = ValueKB.countMinTrailingZeros();
7361 Result = 0;
7362 break;
7363 case TargetOpcode::G_LSHR:
7364 Result = 0;
7365 SignificantBits = ValueKB.countMinLeadingZeros();
7366 break;
7367 case TargetOpcode::G_ASHR:
7368 if (ValueKB.isNonNegative()) {
7369 SignificantBits = ValueKB.countMinLeadingZeros();
7370 Result = 0;
7371 } else if (ValueKB.isNegative()) {
7372 SignificantBits = ValueKB.countMinLeadingOnes();
7373 Result = -1;
7374 } else {
7375 // Cannot determine shift result.
7376 Result = std::nullopt;
7377 }
7378 break;
7379 default:
7380 break;
7381 }
7382 return ValueKB.getBitWidth() - SignificantBits;
7383}
7384
7386 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7387 Register ShiftVal = MI.getOperand(1).getReg();
7388 Register ShiftReg = MI.getOperand(2).getReg();
7389 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7390 auto IsShiftTooBig = [&](const Constant *C) {
7391 auto *CI = dyn_cast<ConstantInt>(C);
7392 if (!CI)
7393 return false;
7394 if (CI->uge(ResTy.getScalarSizeInBits())) {
7395 MatchInfo = std::nullopt;
7396 return true;
7397 }
7398 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7399 MI.getOpcode(), MatchInfo);
7400 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7401 };
7402 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7403}
7404
7406 unsigned LHSOpndIdx = 1;
7407 unsigned RHSOpndIdx = 2;
7408 switch (MI.getOpcode()) {
7409 case TargetOpcode::G_UADDO:
7410 case TargetOpcode::G_SADDO:
7411 case TargetOpcode::G_UMULO:
7412 case TargetOpcode::G_SMULO:
7413 LHSOpndIdx = 2;
7414 RHSOpndIdx = 3;
7415 break;
7416 default:
7417 break;
7418 }
7419 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7420 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7421 if (!getIConstantVRegVal(LHS, MRI)) {
7422 // Skip commuting if LHS is not a constant. But, LHS may be a
7423 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7424 // have a constant on the RHS.
7425 if (MRI.getVRegDef(LHS)->getOpcode() !=
7426 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7427 return false;
7428 }
7429 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7430 return MRI.getVRegDef(RHS)->getOpcode() !=
7431 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7432 !getIConstantVRegVal(RHS, MRI);
7433}
7434
7436 Register LHS = MI.getOperand(1).getReg();
7437 Register RHS = MI.getOperand(2).getReg();
7438 std::optional<FPValueAndVReg> ValAndVReg;
7439 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7440 return false;
7441 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7442}
7443
7445 Observer.changingInstr(MI);
7446 unsigned LHSOpndIdx = 1;
7447 unsigned RHSOpndIdx = 2;
7448 switch (MI.getOpcode()) {
7449 case TargetOpcode::G_UADDO:
7450 case TargetOpcode::G_SADDO:
7451 case TargetOpcode::G_UMULO:
7452 case TargetOpcode::G_SMULO:
7453 LHSOpndIdx = 2;
7454 RHSOpndIdx = 3;
7455 break;
7456 default:
7457 break;
7458 }
7459 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7460 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7461 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7462 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7463 Observer.changedInstr(MI);
7464}
7465
7466bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7467 LLT SrcTy = MRI.getType(Src);
7468 if (SrcTy.isFixedVector())
7469 return isConstantSplatVector(Src, 1, AllowUndefs);
7470 if (SrcTy.isScalar()) {
7471 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7472 return true;
7473 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7474 return IConstant && IConstant->Value == 1;
7475 }
7476 return false; // scalable vector
7477}
7478
7479bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7480 LLT SrcTy = MRI.getType(Src);
7481 if (SrcTy.isFixedVector())
7482 return isConstantSplatVector(Src, 0, AllowUndefs);
7483 if (SrcTy.isScalar()) {
7484 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7485 return true;
7486 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7487 return IConstant && IConstant->Value == 0;
7488 }
7489 return false; // scalable vector
7490}
7491
7492// Ignores COPYs during conformance checks.
7493// FIXME scalable vectors.
7494bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7495 bool AllowUndefs) const {
7496 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7497 if (!BuildVector)
7498 return false;
7499 unsigned NumSources = BuildVector->getNumSources();
7500
7501 for (unsigned I = 0; I < NumSources; ++I) {
7502 GImplicitDef *ImplicitDef =
7504 if (ImplicitDef && AllowUndefs)
7505 continue;
7506 if (ImplicitDef && !AllowUndefs)
7507 return false;
7508 std::optional<ValueAndVReg> IConstant =
7510 if (IConstant && IConstant->Value == SplatValue)
7511 continue;
7512 return false;
7513 }
7514 return true;
7515}
7516
7517// Ignores COPYs during lookups.
7518// FIXME scalable vectors
7519std::optional<APInt>
7520CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7521 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7522 if (IConstant)
7523 return IConstant->Value;
7524
7525 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7526 if (!BuildVector)
7527 return std::nullopt;
7528 unsigned NumSources = BuildVector->getNumSources();
7529
7530 std::optional<APInt> Value = std::nullopt;
7531 for (unsigned I = 0; I < NumSources; ++I) {
7532 std::optional<ValueAndVReg> IConstant =
7534 if (!IConstant)
7535 return std::nullopt;
7536 if (!Value)
7537 Value = IConstant->Value;
7538 else if (*Value != IConstant->Value)
7539 return std::nullopt;
7540 }
7541 return Value;
7542}
7543
7544// FIXME G_SPLAT_VECTOR
7545bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7546 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7547 if (IConstant)
7548 return true;
7549
7550 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7551 if (!BuildVector)
7552 return false;
7553
7554 unsigned NumSources = BuildVector->getNumSources();
7555 for (unsigned I = 0; I < NumSources; ++I) {
7556 std::optional<ValueAndVReg> IConstant =
7558 if (!IConstant)
7559 return false;
7560 }
7561 return true;
7562}
7563
7564// TODO: use knownbits to determine zeros
7565bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7566 BuildFnTy &MatchInfo) const {
7567 uint32_t Flags = Select->getFlags();
7568 Register Dest = Select->getReg(0);
7569 Register Cond = Select->getCondReg();
7570 Register True = Select->getTrueReg();
7571 Register False = Select->getFalseReg();
7572 LLT CondTy = MRI.getType(Select->getCondReg());
7573 LLT TrueTy = MRI.getType(Select->getTrueReg());
7574
7575 // We only do this combine for scalar boolean conditions.
7576 if (CondTy != LLT::scalar(1))
7577 return false;
7578
7579 if (TrueTy.isPointer())
7580 return false;
7581
7582 // Both are scalars.
7583 std::optional<ValueAndVReg> TrueOpt =
7585 std::optional<ValueAndVReg> FalseOpt =
7587
7588 if (!TrueOpt || !FalseOpt)
7589 return false;
7590
7591 APInt TrueValue = TrueOpt->Value;
7592 APInt FalseValue = FalseOpt->Value;
7593
7594 // select Cond, 1, 0 --> zext (Cond)
7595 if (TrueValue.isOne() && FalseValue.isZero()) {
7596 MatchInfo = [=](MachineIRBuilder &B) {
7597 B.setInstrAndDebugLoc(*Select);
7598 B.buildZExtOrTrunc(Dest, Cond);
7599 };
7600 return true;
7601 }
7602
7603 // select Cond, -1, 0 --> sext (Cond)
7604 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7605 MatchInfo = [=](MachineIRBuilder &B) {
7606 B.setInstrAndDebugLoc(*Select);
7607 B.buildSExtOrTrunc(Dest, Cond);
7608 };
7609 return true;
7610 }
7611
7612 // select Cond, 0, 1 --> zext (!Cond)
7613 if (TrueValue.isZero() && FalseValue.isOne()) {
7614 MatchInfo = [=](MachineIRBuilder &B) {
7615 B.setInstrAndDebugLoc(*Select);
7616 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7617 B.buildNot(Inner, Cond);
7618 B.buildZExtOrTrunc(Dest, Inner);
7619 };
7620 return true;
7621 }
7622
7623 // select Cond, 0, -1 --> sext (!Cond)
7624 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7625 MatchInfo = [=](MachineIRBuilder &B) {
7626 B.setInstrAndDebugLoc(*Select);
7627 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7628 B.buildNot(Inner, Cond);
7629 B.buildSExtOrTrunc(Dest, Inner);
7630 };
7631 return true;
7632 }
7633
7634 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7635 if (TrueValue - 1 == FalseValue) {
7636 MatchInfo = [=](MachineIRBuilder &B) {
7637 B.setInstrAndDebugLoc(*Select);
7638 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7639 B.buildZExtOrTrunc(Inner, Cond);
7640 B.buildAdd(Dest, Inner, False);
7641 };
7642 return true;
7643 }
7644
7645 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7646 if (TrueValue + 1 == FalseValue) {
7647 MatchInfo = [=](MachineIRBuilder &B) {
7648 B.setInstrAndDebugLoc(*Select);
7649 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7650 B.buildSExtOrTrunc(Inner, Cond);
7651 B.buildAdd(Dest, Inner, False);
7652 };
7653 return true;
7654 }
7655
7656 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7657 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7658 MatchInfo = [=](MachineIRBuilder &B) {
7659 B.setInstrAndDebugLoc(*Select);
7660 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7661 B.buildZExtOrTrunc(Inner, Cond);
7662 // The shift amount must be scalar.
7663 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7664 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7665 B.buildShl(Dest, Inner, ShAmtC, Flags);
7666 };
7667 return true;
7668 }
7669
7670 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7671 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7672 MatchInfo = [=](MachineIRBuilder &B) {
7673 B.setInstrAndDebugLoc(*Select);
7674 Register Not = MRI.createGenericVirtualRegister(CondTy);
7675 B.buildNot(Not, Cond);
7676 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7677 B.buildZExtOrTrunc(Inner, Not);
7678 // The shift amount must be scalar.
7679 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7680 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7681 B.buildShl(Dest, Inner, ShAmtC, Flags);
7682 };
7683 return true;
7684 }
7685
7686 // select Cond, -1, C --> or (sext Cond), C
7687 if (TrueValue.isAllOnes()) {
7688 MatchInfo = [=](MachineIRBuilder &B) {
7689 B.setInstrAndDebugLoc(*Select);
7690 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7691 B.buildSExtOrTrunc(Inner, Cond);
7692 B.buildOr(Dest, Inner, False, Flags);
7693 };
7694 return true;
7695 }
7696
7697 // select Cond, C, -1 --> or (sext (not Cond)), C
7698 if (FalseValue.isAllOnes()) {
7699 MatchInfo = [=](MachineIRBuilder &B) {
7700 B.setInstrAndDebugLoc(*Select);
7701 Register Not = MRI.createGenericVirtualRegister(CondTy);
7702 B.buildNot(Not, Cond);
7703 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7704 B.buildSExtOrTrunc(Inner, Not);
7705 B.buildOr(Dest, Inner, True, Flags);
7706 };
7707 return true;
7708 }
7709
7710 return false;
7711}
7712
7713// TODO: use knownbits to determine zeros
7714bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7715 BuildFnTy &MatchInfo) const {
7716 uint32_t Flags = Select->getFlags();
7717 Register DstReg = Select->getReg(0);
7718 Register Cond = Select->getCondReg();
7719 Register True = Select->getTrueReg();
7720 Register False = Select->getFalseReg();
7721 LLT CondTy = MRI.getType(Select->getCondReg());
7722 LLT TrueTy = MRI.getType(Select->getTrueReg());
7723
7724 // Boolean or fixed vector of booleans.
7725 if (CondTy.isScalableVector() ||
7726 (CondTy.isFixedVector() &&
7727 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7728 CondTy.getScalarSizeInBits() != 1)
7729 return false;
7730
7731 if (CondTy != TrueTy)
7732 return false;
7733
7734 // select Cond, Cond, F --> or Cond, F
7735 // select Cond, 1, F --> or Cond, F
7736 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7737 MatchInfo = [=](MachineIRBuilder &B) {
7738 B.setInstrAndDebugLoc(*Select);
7739 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7740 B.buildZExtOrTrunc(Ext, Cond);
7741 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7742 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7743 };
7744 return true;
7745 }
7746
7747 // select Cond, T, Cond --> and Cond, T
7748 // select Cond, T, 0 --> and Cond, T
7749 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7750 MatchInfo = [=](MachineIRBuilder &B) {
7751 B.setInstrAndDebugLoc(*Select);
7752 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7753 B.buildZExtOrTrunc(Ext, Cond);
7754 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7755 B.buildAnd(DstReg, Ext, FreezeTrue);
7756 };
7757 return true;
7758 }
7759
7760 // select Cond, T, 1 --> or (not Cond), T
7761 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7762 MatchInfo = [=](MachineIRBuilder &B) {
7763 B.setInstrAndDebugLoc(*Select);
7764 // First the not.
7765 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7766 B.buildNot(Inner, Cond);
7767 // Then an ext to match the destination register.
7768 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7769 B.buildZExtOrTrunc(Ext, Inner);
7770 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7771 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7772 };
7773 return true;
7774 }
7775
7776 // select Cond, 0, F --> and (not Cond), F
7777 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7778 MatchInfo = [=](MachineIRBuilder &B) {
7779 B.setInstrAndDebugLoc(*Select);
7780 // First the not.
7781 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7782 B.buildNot(Inner, Cond);
7783 // Then an ext to match the destination register.
7784 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7785 B.buildZExtOrTrunc(Ext, Inner);
7786 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7787 B.buildAnd(DstReg, Ext, FreezeFalse);
7788 };
7789 return true;
7790 }
7791
7792 return false;
7793}
7794
7796 BuildFnTy &MatchInfo) const {
7797 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7798 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7799
7800 Register DstReg = Select->getReg(0);
7801 Register True = Select->getTrueReg();
7802 Register False = Select->getFalseReg();
7803 LLT DstTy = MRI.getType(DstReg);
7804
7805 if (DstTy.isPointerOrPointerVector())
7806 return false;
7807
7808 // We want to fold the icmp and replace the select.
7809 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7810 return false;
7811
7812 CmpInst::Predicate Pred = Cmp->getCond();
7813 // We need a larger or smaller predicate for
7814 // canonicalization.
7815 if (CmpInst::isEquality(Pred))
7816 return false;
7817
7818 Register CmpLHS = Cmp->getLHSReg();
7819 Register CmpRHS = Cmp->getRHSReg();
7820
7821 // We can swap CmpLHS and CmpRHS for higher hitrate.
7822 if (True == CmpRHS && False == CmpLHS) {
7823 std::swap(CmpLHS, CmpRHS);
7824 Pred = CmpInst::getSwappedPredicate(Pred);
7825 }
7826
7827 // (icmp X, Y) ? X : Y -> integer minmax.
7828 // see matchSelectPattern in ValueTracking.
7829 // Legality between G_SELECT and integer minmax can differ.
7830 if (True != CmpLHS || False != CmpRHS)
7831 return false;
7832
7833 switch (Pred) {
7834 case ICmpInst::ICMP_UGT:
7835 case ICmpInst::ICMP_UGE: {
7836 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7837 return false;
7838 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7839 return true;
7840 }
7841 case ICmpInst::ICMP_SGT:
7842 case ICmpInst::ICMP_SGE: {
7843 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7844 return false;
7845 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7846 return true;
7847 }
7848 case ICmpInst::ICMP_ULT:
7849 case ICmpInst::ICMP_ULE: {
7850 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7851 return false;
7852 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7853 return true;
7854 }
7855 case ICmpInst::ICMP_SLT:
7856 case ICmpInst::ICMP_SLE: {
7857 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7858 return false;
7859 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7860 return true;
7861 }
7862 default:
7863 return false;
7864 }
7865}
7866
7867// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7869 BuildFnTy &MatchInfo) const {
7870 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7871 Register DestReg = MI.getOperand(0).getReg();
7872 LLT DestTy = MRI.getType(DestReg);
7873
7874 Register X;
7875 Register Sub0;
7876 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7877 if (mi_match(DestReg, MRI,
7878 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7879 m_GSMax(m_Reg(X), NegPattern),
7880 m_GUMin(m_Reg(X), NegPattern),
7881 m_GUMax(m_Reg(X), NegPattern)))))) {
7882 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7883 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7884 if (isLegal({NewOpc, {DestTy}})) {
7885 MatchInfo = [=](MachineIRBuilder &B) {
7886 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7887 };
7888 return true;
7889 }
7890 }
7891
7892 return false;
7893}
7894
7897
7898 if (tryFoldSelectOfConstants(Select, MatchInfo))
7899 return true;
7900
7901 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7902 return true;
7903
7904 return false;
7905}
7906
7907/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7908/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7909/// into a single comparison using range-based reasoning.
7910/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7911bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7912 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7913 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7914 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7915 Register DstReg = Logic->getReg(0);
7916 Register LHS = Logic->getLHSReg();
7917 Register RHS = Logic->getRHSReg();
7918 unsigned Flags = Logic->getFlags();
7919
7920 // We need an G_ICMP on the LHS register.
7921 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7922 if (!Cmp1)
7923 return false;
7924
7925 // We need an G_ICMP on the RHS register.
7926 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7927 if (!Cmp2)
7928 return false;
7929
7930 // We want to fold the icmps.
7931 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7932 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7933 return false;
7934
7935 APInt C1;
7936 APInt C2;
7937 std::optional<ValueAndVReg> MaybeC1 =
7939 if (!MaybeC1)
7940 return false;
7941 C1 = MaybeC1->Value;
7942
7943 std::optional<ValueAndVReg> MaybeC2 =
7945 if (!MaybeC2)
7946 return false;
7947 C2 = MaybeC2->Value;
7948
7949 Register R1 = Cmp1->getLHSReg();
7950 Register R2 = Cmp2->getLHSReg();
7951 CmpInst::Predicate Pred1 = Cmp1->getCond();
7952 CmpInst::Predicate Pred2 = Cmp2->getCond();
7953 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7954 LLT CmpOperandTy = MRI.getType(R1);
7955
7956 if (CmpOperandTy.isPointer())
7957 return false;
7958
7959 // We build ands, adds, and constants of type CmpOperandTy.
7960 // They must be legal to build.
7961 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7962 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7963 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7964 return false;
7965
7966 // Look through add of a constant offset on R1, R2, or both operands. This
7967 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7968 std::optional<APInt> Offset1;
7969 std::optional<APInt> Offset2;
7970 if (R1 != R2) {
7971 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7972 std::optional<ValueAndVReg> MaybeOffset1 =
7974 if (MaybeOffset1) {
7975 R1 = Add->getLHSReg();
7976 Offset1 = MaybeOffset1->Value;
7977 }
7978 }
7979 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7980 std::optional<ValueAndVReg> MaybeOffset2 =
7982 if (MaybeOffset2) {
7983 R2 = Add->getLHSReg();
7984 Offset2 = MaybeOffset2->Value;
7985 }
7986 }
7987 }
7988
7989 if (R1 != R2)
7990 return false;
7991
7992 // We calculate the icmp ranges including maybe offsets.
7993 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7994 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7995 if (Offset1)
7996 CR1 = CR1.subtract(*Offset1);
7997
7998 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7999 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
8000 if (Offset2)
8001 CR2 = CR2.subtract(*Offset2);
8002
8003 bool CreateMask = false;
8004 APInt LowerDiff;
8005 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
8006 if (!CR) {
8007 // We need non-wrapping ranges.
8008 if (CR1.isWrappedSet() || CR2.isWrappedSet())
8009 return false;
8010
8011 // Check whether we have equal-size ranges that only differ by one bit.
8012 // In that case we can apply a mask to map one range onto the other.
8013 LowerDiff = CR1.getLower() ^ CR2.getLower();
8014 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
8015 APInt CR1Size = CR1.getUpper() - CR1.getLower();
8016 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
8017 CR1Size != CR2.getUpper() - CR2.getLower())
8018 return false;
8019
8020 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
8021 CreateMask = true;
8022 }
8023
8024 if (IsAnd)
8025 CR = CR->inverse();
8026
8027 CmpInst::Predicate NewPred;
8028 APInt NewC, Offset;
8029 CR->getEquivalentICmp(NewPred, NewC, Offset);
8030
8031 // We take the result type of one of the original icmps, CmpTy, for
8032 // the to be build icmp. The operand type, CmpOperandTy, is used for
8033 // the other instructions and constants to be build. The types of
8034 // the parameters and output are the same for add and and. CmpTy
8035 // and the type of DstReg might differ. That is why we zext or trunc
8036 // the icmp into the destination register.
8037
8038 MatchInfo = [=](MachineIRBuilder &B) {
8039 if (CreateMask && Offset != 0) {
8040 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8041 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8042 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8043 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
8044 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8045 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8046 B.buildZExtOrTrunc(DstReg, ICmp);
8047 } else if (CreateMask && Offset == 0) {
8048 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8049 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8050 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8051 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
8052 B.buildZExtOrTrunc(DstReg, ICmp);
8053 } else if (!CreateMask && Offset != 0) {
8054 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8055 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
8056 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8057 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8058 B.buildZExtOrTrunc(DstReg, ICmp);
8059 } else if (!CreateMask && Offset == 0) {
8060 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8061 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
8062 B.buildZExtOrTrunc(DstReg, ICmp);
8063 } else {
8064 llvm_unreachable("unexpected configuration of CreateMask and Offset");
8065 }
8066 };
8067 return true;
8068}
8069
8070bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
8071 BuildFnTy &MatchInfo) const {
8072 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
8073 Register DestReg = Logic->getReg(0);
8074 Register LHS = Logic->getLHSReg();
8075 Register RHS = Logic->getRHSReg();
8076 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
8077
8078 // We need a compare on the LHS register.
8079 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
8080 if (!Cmp1)
8081 return false;
8082
8083 // We need a compare on the RHS register.
8084 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
8085 if (!Cmp2)
8086 return false;
8087
8088 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
8089 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
8090
8091 // We build one fcmp, want to fold the fcmps, replace the logic op,
8092 // and the fcmps must have the same shape.
8094 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
8095 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
8096 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
8097 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
8098 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
8099 return false;
8100
8101 CmpInst::Predicate PredL = Cmp1->getCond();
8102 CmpInst::Predicate PredR = Cmp2->getCond();
8103 Register LHS0 = Cmp1->getLHSReg();
8104 Register LHS1 = Cmp1->getRHSReg();
8105 Register RHS0 = Cmp2->getLHSReg();
8106 Register RHS1 = Cmp2->getRHSReg();
8107
8108 if (LHS0 == RHS1 && LHS1 == RHS0) {
8109 // Swap RHS operands to match LHS.
8110 PredR = CmpInst::getSwappedPredicate(PredR);
8111 std::swap(RHS0, RHS1);
8112 }
8113
8114 if (LHS0 == RHS0 && LHS1 == RHS1) {
8115 // We determine the new predicate.
8116 unsigned CmpCodeL = getFCmpCode(PredL);
8117 unsigned CmpCodeR = getFCmpCode(PredR);
8118 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
8119 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
8120 MatchInfo = [=](MachineIRBuilder &B) {
8121 // The fcmp predicates fill the lower part of the enum.
8122 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
8123 if (Pred == FCmpInst::FCMP_FALSE &&
8125 auto False = B.buildConstant(CmpTy, 0);
8126 B.buildZExtOrTrunc(DestReg, False);
8127 } else if (Pred == FCmpInst::FCMP_TRUE &&
8129 auto True =
8130 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
8131 CmpTy.isVector() /*isVector*/,
8132 true /*isFP*/));
8133 B.buildZExtOrTrunc(DestReg, True);
8134 } else { // We take the predicate without predicate optimizations.
8135 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
8136 B.buildZExtOrTrunc(DestReg, Cmp);
8137 }
8138 };
8139 return true;
8140 }
8141
8142 return false;
8143}
8144
8146 GAnd *And = cast<GAnd>(&MI);
8147
8148 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8149 return true;
8150
8151 if (tryFoldLogicOfFCmps(And, MatchInfo))
8152 return true;
8153
8154 return false;
8155}
8156
8158 GOr *Or = cast<GOr>(&MI);
8159
8160 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8161 return true;
8162
8163 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8164 return true;
8165
8166 return false;
8167}
8168
8170 BuildFnTy &MatchInfo) const {
8172
8173 // Addo has no flags
8174 Register Dst = Add->getReg(0);
8175 Register Carry = Add->getReg(1);
8176 Register LHS = Add->getLHSReg();
8177 Register RHS = Add->getRHSReg();
8178 bool IsSigned = Add->isSigned();
8179 LLT DstTy = MRI.getType(Dst);
8180 LLT CarryTy = MRI.getType(Carry);
8181
8182 // Fold addo, if the carry is dead -> add, undef.
8183 if (MRI.use_nodbg_empty(Carry) &&
8184 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8185 MatchInfo = [=](MachineIRBuilder &B) {
8186 B.buildAdd(Dst, LHS, RHS);
8187 B.buildUndef(Carry);
8188 };
8189 return true;
8190 }
8191
8192 // Canonicalize constant to RHS.
8193 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8194 if (IsSigned) {
8195 MatchInfo = [=](MachineIRBuilder &B) {
8196 B.buildSAddo(Dst, Carry, RHS, LHS);
8197 };
8198 return true;
8199 }
8200 // !IsSigned
8201 MatchInfo = [=](MachineIRBuilder &B) {
8202 B.buildUAddo(Dst, Carry, RHS, LHS);
8203 };
8204 return true;
8205 }
8206
8207 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8208 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8209
8210 // Fold addo(c1, c2) -> c3, carry.
8211 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8213 bool Overflow;
8214 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8215 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8216 MatchInfo = [=](MachineIRBuilder &B) {
8217 B.buildConstant(Dst, Result);
8218 B.buildConstant(Carry, Overflow);
8219 };
8220 return true;
8221 }
8222
8223 // Fold (addo x, 0) -> x, no carry
8224 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8225 MatchInfo = [=](MachineIRBuilder &B) {
8226 B.buildCopy(Dst, LHS);
8227 B.buildConstant(Carry, 0);
8228 };
8229 return true;
8230 }
8231
8232 // Given 2 constant operands whose sum does not overflow:
8233 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8234 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8235 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8236 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8237 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8238 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8239 std::optional<APInt> MaybeAddRHS =
8240 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8241 if (MaybeAddRHS) {
8242 bool Overflow;
8243 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8244 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8245 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8246 if (IsSigned) {
8247 MatchInfo = [=](MachineIRBuilder &B) {
8248 auto ConstRHS = B.buildConstant(DstTy, NewC);
8249 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8250 };
8251 return true;
8252 }
8253 // !IsSigned
8254 MatchInfo = [=](MachineIRBuilder &B) {
8255 auto ConstRHS = B.buildConstant(DstTy, NewC);
8256 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8257 };
8258 return true;
8259 }
8260 }
8261 };
8262
8263 // We try to combine addo to non-overflowing add.
8264 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8266 return false;
8267
8268 // We try to combine uaddo to non-overflowing add.
8269 if (!IsSigned) {
8270 ConstantRange CRLHS =
8271 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8272 ConstantRange CRRHS =
8273 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8274
8275 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8277 return false;
8279 MatchInfo = [=](MachineIRBuilder &B) {
8280 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8281 B.buildConstant(Carry, 0);
8282 };
8283 return true;
8284 }
8287 MatchInfo = [=](MachineIRBuilder &B) {
8288 B.buildAdd(Dst, LHS, RHS);
8289 B.buildConstant(Carry, 1);
8290 };
8291 return true;
8292 }
8293 }
8294 return false;
8295 }
8296
8297 // We try to combine saddo to non-overflowing add.
8298
8299 // If LHS and RHS each have at least two sign bits, then there is no signed
8300 // overflow.
8301 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8302 MatchInfo = [=](MachineIRBuilder &B) {
8303 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8304 B.buildConstant(Carry, 0);
8305 };
8306 return true;
8307 }
8308
8309 ConstantRange CRLHS =
8310 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8311 ConstantRange CRRHS =
8312 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8313
8314 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8316 return false;
8318 MatchInfo = [=](MachineIRBuilder &B) {
8319 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8320 B.buildConstant(Carry, 0);
8321 };
8322 return true;
8323 }
8326 MatchInfo = [=](MachineIRBuilder &B) {
8327 B.buildAdd(Dst, LHS, RHS);
8328 B.buildConstant(Carry, 1);
8329 };
8330 return true;
8331 }
8332 }
8333
8334 return false;
8335}
8336
8338 BuildFnTy &MatchInfo) const {
8340 MatchInfo(Builder);
8341 Root->eraseFromParent();
8342}
8343
8345 int64_t Exponent) const {
8346 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8348}
8349
8351 int64_t Exponent) const {
8352 auto [Dst, Base] = MI.getFirst2Regs();
8353 LLT Ty = MRI.getType(Dst);
8354 int64_t ExpVal = Exponent;
8355
8356 if (ExpVal == 0) {
8357 Builder.buildFConstant(Dst, 1.0);
8358 MI.removeFromParent();
8359 return;
8360 }
8361
8362 if (ExpVal < 0)
8363 ExpVal = -ExpVal;
8364
8365 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8366 // to generate the multiply sequence. There are more optimal ways to do this
8367 // (for example, powi(x,15) generates one more multiply than it should), but
8368 // this has the benefit of being both really simple and much better than a
8369 // libcall.
8370 std::optional<SrcOp> Res;
8371 SrcOp CurSquare = Base;
8372 while (ExpVal > 0) {
8373 if (ExpVal & 1) {
8374 if (!Res)
8375 Res = CurSquare;
8376 else
8377 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8378 }
8379
8380 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8381 ExpVal >>= 1;
8382 }
8383
8384 // If the original exponent was negative, invert the result, producing
8385 // 1/(x*x*x).
8386 if (Exponent < 0)
8387 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8388 MI.getFlags());
8389
8390 Builder.buildCopy(Dst, *Res);
8391 MI.eraseFromParent();
8392}
8393
8395 BuildFnTy &MatchInfo) const {
8396 // fold (A+C1)-C2 -> A+(C1-C2)
8397 const GSub *Sub = cast<GSub>(&MI);
8398 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8399
8400 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8401 return false;
8402
8403 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8404 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8405
8406 Register Dst = Sub->getReg(0);
8407 LLT DstTy = MRI.getType(Dst);
8408
8409 MatchInfo = [=](MachineIRBuilder &B) {
8410 auto Const = B.buildConstant(DstTy, C1 - C2);
8411 B.buildAdd(Dst, Add->getLHSReg(), Const);
8412 };
8413
8414 return true;
8415}
8416
8418 BuildFnTy &MatchInfo) const {
8419 // fold C2-(A+C1) -> (C2-C1)-A
8420 const GSub *Sub = cast<GSub>(&MI);
8421 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8422
8423 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8424 return false;
8425
8426 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8427 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8428
8429 Register Dst = Sub->getReg(0);
8430 LLT DstTy = MRI.getType(Dst);
8431
8432 MatchInfo = [=](MachineIRBuilder &B) {
8433 auto Const = B.buildConstant(DstTy, C2 - C1);
8434 B.buildSub(Dst, Const, Add->getLHSReg());
8435 };
8436
8437 return true;
8438}
8439
8441 BuildFnTy &MatchInfo) const {
8442 // fold (A-C1)-C2 -> A-(C1+C2)
8443 const GSub *Sub1 = cast<GSub>(&MI);
8444 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8445
8446 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8447 return false;
8448
8449 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8450 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8451
8452 Register Dst = Sub1->getReg(0);
8453 LLT DstTy = MRI.getType(Dst);
8454
8455 MatchInfo = [=](MachineIRBuilder &B) {
8456 auto Const = B.buildConstant(DstTy, C1 + C2);
8457 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8458 };
8459
8460 return true;
8461}
8462
8464 BuildFnTy &MatchInfo) const {
8465 // fold (C1-A)-C2 -> (C1-C2)-A
8466 const GSub *Sub1 = cast<GSub>(&MI);
8467 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8468
8469 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8470 return false;
8471
8472 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8473 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8474
8475 Register Dst = Sub1->getReg(0);
8476 LLT DstTy = MRI.getType(Dst);
8477
8478 MatchInfo = [=](MachineIRBuilder &B) {
8479 auto Const = B.buildConstant(DstTy, C1 - C2);
8480 B.buildSub(Dst, Const, Sub2->getRHSReg());
8481 };
8482
8483 return true;
8484}
8485
8487 BuildFnTy &MatchInfo) const {
8488 // fold ((A-C1)+C2) -> (A+(C2-C1))
8489 const GAdd *Add = cast<GAdd>(&MI);
8490 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8491
8492 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8493 return false;
8494
8495 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8496 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8497
8498 Register Dst = Add->getReg(0);
8499 LLT DstTy = MRI.getType(Dst);
8500
8501 MatchInfo = [=](MachineIRBuilder &B) {
8502 auto Const = B.buildConstant(DstTy, C2 - C1);
8503 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8504 };
8505
8506 return true;
8507}
8508
8510 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8511 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8512
8513 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8514 return false;
8515
8516 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8517
8518 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8519
8520 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8521 // $any:_(<8 x s16>) = G_ANYEXT $bv
8522 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8523 //
8524 // ->
8525 //
8526 // $any:_(s16) = G_ANYEXT $bv[0]
8527 // $any1:_(s16) = G_ANYEXT $bv[1]
8528 // $any2:_(s16) = G_ANYEXT $bv[2]
8529 // $any3:_(s16) = G_ANYEXT $bv[3]
8530 // $any4:_(s16) = G_ANYEXT $bv[4]
8531 // $any5:_(s16) = G_ANYEXT $bv[5]
8532 // $any6:_(s16) = G_ANYEXT $bv[6]
8533 // $any7:_(s16) = G_ANYEXT $bv[7]
8534 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8535 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8536
8537 // We want to unmerge into vectors.
8538 if (!DstTy.isFixedVector())
8539 return false;
8540
8541 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8542 if (!Any)
8543 return false;
8544
8545 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8546
8547 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8548 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8549
8550 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8551 return false;
8552
8553 // FIXME: check element types?
8554 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8555 return false;
8556
8557 LLT BigBvTy = MRI.getType(BV->getReg(0));
8558 LLT SmallBvTy = DstTy;
8559 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8560
8562 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8563 return false;
8564
8565 // We check the legality of scalar anyext.
8567 {TargetOpcode::G_ANYEXT,
8568 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8569 return false;
8570
8571 MatchInfo = [=](MachineIRBuilder &B) {
8572 // Build into each G_UNMERGE_VALUES def
8573 // a small build vector with anyext from the source build vector.
8574 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8576 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8577 Register SourceArray =
8578 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8579 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8580 Ops.push_back(AnyExt.getReg(0));
8581 }
8582 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8583 };
8584 };
8585 return true;
8586 };
8587
8588 return false;
8589}
8590
8592 BuildFnTy &MatchInfo) const {
8593
8594 bool Changed = false;
8595 auto &Shuffle = cast<GShuffleVector>(MI);
8596 ArrayRef<int> OrigMask = Shuffle.getMask();
8597 SmallVector<int, 16> NewMask;
8598 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8599 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8600 const unsigned NumDstElts = OrigMask.size();
8601 for (unsigned i = 0; i != NumDstElts; ++i) {
8602 int Idx = OrigMask[i];
8603 if (Idx >= (int)NumSrcElems) {
8604 Idx = -1;
8605 Changed = true;
8606 }
8607 NewMask.push_back(Idx);
8608 }
8609
8610 if (!Changed)
8611 return false;
8612
8613 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8614 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8615 std::move(NewMask));
8616 };
8617
8618 return true;
8619}
8620
8621static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8622 const unsigned MaskSize = Mask.size();
8623 for (unsigned I = 0; I < MaskSize; ++I) {
8624 int Idx = Mask[I];
8625 if (Idx < 0)
8626 continue;
8627
8628 if (Idx < (int)NumElems)
8629 Mask[I] = Idx + NumElems;
8630 else
8631 Mask[I] = Idx - NumElems;
8632 }
8633}
8634
8636 BuildFnTy &MatchInfo) const {
8637
8638 auto &Shuffle = cast<GShuffleVector>(MI);
8639 // If any of the two inputs is already undef, don't check the mask again to
8640 // prevent infinite loop
8641 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8642 return false;
8643
8644 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8645 return false;
8646
8647 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8648 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8650 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8651 return false;
8652
8653 ArrayRef<int> Mask = Shuffle.getMask();
8654 const unsigned NumSrcElems = Src1Ty.getNumElements();
8655
8656 bool TouchesSrc1 = false;
8657 bool TouchesSrc2 = false;
8658 const unsigned NumElems = Mask.size();
8659 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8660 if (Mask[Idx] < 0)
8661 continue;
8662
8663 if (Mask[Idx] < (int)NumSrcElems)
8664 TouchesSrc1 = true;
8665 else
8666 TouchesSrc2 = true;
8667 }
8668
8669 if (TouchesSrc1 == TouchesSrc2)
8670 return false;
8671
8672 Register NewSrc1 = Shuffle.getSrc1Reg();
8673 SmallVector<int, 16> NewMask(Mask);
8674 if (TouchesSrc2) {
8675 NewSrc1 = Shuffle.getSrc2Reg();
8676 commuteMask(NewMask, NumSrcElems);
8677 }
8678
8679 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8680 auto Undef = B.buildUndef(Src1Ty);
8681 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8682 };
8683
8684 return true;
8685}
8686
8688 BuildFnTy &MatchInfo) const {
8689 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8690
8691 Register Dst = Subo->getReg(0);
8692 Register LHS = Subo->getLHSReg();
8693 Register RHS = Subo->getRHSReg();
8694 Register Carry = Subo->getCarryOutReg();
8695 LLT DstTy = MRI.getType(Dst);
8696 LLT CarryTy = MRI.getType(Carry);
8697
8698 // Check legality before known bits.
8699 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8701 return false;
8702
8703 ConstantRange KBLHS =
8704 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8705 /* IsSigned=*/Subo->isSigned());
8706 ConstantRange KBRHS =
8707 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8708 /* IsSigned=*/Subo->isSigned());
8709
8710 if (Subo->isSigned()) {
8711 // G_SSUBO
8712 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8714 return false;
8716 MatchInfo = [=](MachineIRBuilder &B) {
8717 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8718 B.buildConstant(Carry, 0);
8719 };
8720 return true;
8721 }
8724 MatchInfo = [=](MachineIRBuilder &B) {
8725 B.buildSub(Dst, LHS, RHS);
8726 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8727 /*isVector=*/CarryTy.isVector(),
8728 /*isFP=*/false));
8729 };
8730 return true;
8731 }
8732 }
8733 return false;
8734 }
8735
8736 // G_USUBO
8737 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8739 return false;
8741 MatchInfo = [=](MachineIRBuilder &B) {
8742 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8743 B.buildConstant(Carry, 0);
8744 };
8745 return true;
8746 }
8749 MatchInfo = [=](MachineIRBuilder &B) {
8750 B.buildSub(Dst, LHS, RHS);
8751 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8752 /*isVector=*/CarryTy.isVector(),
8753 /*isFP=*/false));
8754 };
8755 return true;
8756 }
8757 }
8758
8759 return false;
8760}
8761
8762// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8763// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8765 BuildFnTy &MatchInfo) const {
8766 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8767 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON) &&
8768 "Expected G_CTLZ variant");
8769
8770 const Register Dst = CtlzMI.getOperand(0).getReg();
8771 Register Src = CtlzMI.getOperand(1).getReg();
8772
8773 LLT Ty = MRI.getType(Dst);
8774 LLT SrcTy = MRI.getType(Src);
8775
8776 if (!(Ty.isValid() && Ty.isScalar()))
8777 return false;
8778
8779 if (!LI)
8780 return false;
8781
8782 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8783 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8784
8785 switch (LI->getAction(Query).Action) {
8786 default:
8787 return false;
8791 break;
8792 }
8793
8794 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8795 Register V;
8796 bool NeedAdd = true;
8797 if (mi_match(Src, MRI,
8799 m_SpecificICst(1))))) {
8800 NeedAdd = false;
8801 Src = V;
8802 }
8803
8804 unsigned BitWidth = Ty.getScalarSizeInBits();
8805
8806 Register X;
8807 if (!mi_match(Src, MRI,
8810 m_SpecificICst(BitWidth - 1)))))))
8811 return false;
8812
8813 MatchInfo = [=](MachineIRBuilder &B) {
8814 if (!NeedAdd) {
8815 B.buildCTLS(Dst, X);
8816 return;
8817 }
8818
8819 auto Ctls = B.buildCTLS(Ty, X);
8820 auto One = B.buildConstant(Ty, 1);
8821
8822 B.buildAdd(Dst, Ctls, One);
8823 };
8824
8825 return true;
8826}
8827
8828// Fold shr ( add ( ext X, ext Y ), 1 ) -> avgfloor ( x, y )
8829// Fold shr ( add ( ext X, ext Y, 1 ), 1 ) -> avgceil ( x, y )
8832 unsigned TargetOpc) const {
8833 assert((MI.getOpcode() == TargetOpcode::G_LSHR ||
8834 MI.getOpcode() == TargetOpcode::G_ASHR) &&
8835 "Expected G_LSHR/G_ASHR");
8836
8837 LLT XTy = MRI.getType(X);
8838 return XTy == MRI.getType(Y) && isLegal({TargetOpc, {XTy}});
8839}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:349
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:350
const fltSemantics & getSemantics() const
Definition APFloat.h:1552
bool isNaN() const
Definition APFloat.h:1542
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1300
APInt bitcastToAPInt() const
Definition APFloat.h:1436
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1300
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:755
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:754
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:742
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
LLVM_ABI void applyCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
LLVM_ABI void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
LLVM_ABI bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
LLVM_ABI bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
LLVM_ABI const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
LLVM_ABI void applyPtrAddZero(MachineInstr &MI) const
LLVM_ABI bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
LLVM_ABI void applyUDivOrURemByConst(MachineInstr &MI) const
LLVM_ABI bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
LLVM_ABI void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
LLVM_ABI bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
LLVM_ABI bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
LLVM_ABI bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchAVG(MachineInstr &MI, MachineRegisterInfo &MRI, Register X, Register Y, unsigned TargetOpc) const
LLVM_ABI bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
LLVM_ABI bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
LLVM_ABI bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
LLVM_ABI bool matchPtrAddZero(MachineInstr &MI) const
}
const TargetInstrInfo * TII
LLVM_ABI void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
LLVM_ABI void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
LLVM_ABI bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
LLVM_ABI bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
LLVM_ABI void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
LLVM_ABI bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
LLVM_ABI bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
LLVM_ABI bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
LLVM_ABI void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
LLVM_ABI void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
LLVM_ABI void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
LLVM_ABI bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
LLVM_ABI void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
LLVM_ABI void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
LLVM_ABI void applyCombineMemCpyFamily(MachineInstr &MI, MemCpyFamilyLoweringInfo &MatchInfo) const
LLVM_ABI bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
LLVM_ABI void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
LLVM_ABI bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
LLVM_ABI const DataLayout & getDataLayout() const
LLVM_ABI bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
LLVM_ABI bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
LLVM_ABI bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
LLVM_ABI void applyUMulHToLShr(MachineInstr &MI) const
LLVM_ABI void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
LLVM_ABI bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
LLVM_ABI bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
LLVM_ABI void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
LLVM_ABI bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
LLVM_ABI bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
LLVM_ABI bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
LLVM_ABI bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI const TargetLowering & getTargetLowering() const
LLVM_ABI bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
LLVM_ABI void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
LLVM_ABI void applySDivByPow2(MachineInstr &MI) const
LLVM_ABI void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
LLVM_ABI void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
LLVM_ABI bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
LLVM_ABI bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
LLVM_ABI bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
LLVM_ABI void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
LLVM_ABI bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
LLVM_ABI void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
LLVM_ABI void applyCombineCopy(MachineInstr &MI) const
LLVM_ABI bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
LLVM_ABI bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
LLVM_ABI void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
LLVM_ABI bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
LLVM_ABI bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
LLVM_ABI bool matchSextTruncSextLoad(MachineInstr &MI) const
LLVM_ABI bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
LLVM_ABI bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
LLVM_ABI bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
LLVM_ABI bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
LLVM_ABI bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
LLVM_ABI bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
LLVM_ABI bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
LLVM_ABI bool matchCombineCopy(MachineInstr &MI) const
LLVM_ABI bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
LLVM_ABI void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
LLVM_ABI bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
LLVM_ABI bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
LLVM_ABI void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
LLVM_ABI bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
LLVM_ABI void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
LLVM_ABI bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
LLVM_ABI bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchRedundantSExtInReg(MachineInstr &MI) const
LLVM_ABI void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
LLVM_ABI void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
LLVM_ABI bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
LLVM_ABI void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
LLVM_ABI bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
LLVM_ABI bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
LLVM_ABI void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
LLVM_ABI bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
LLVM_ABI bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
LLVM_ABI void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
LLVM_ABI bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
LLVM_ABI bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
LLVM_ABI bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
LLVM_ABI void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
LLVM_ABI bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
LLVM_ABI bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
LLVM_ABI bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
LLVM_ABI bool isPreLegalize() const
LLVM_ABI bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
LLVM_ABI bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
LLVM_ABI bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
LLVM_ABI bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
LLVM_ABI bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
LLVM_ABI bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
LLVM_ABI bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVM_ABI LLVMContext & getContext() const
LLVM_ABI void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
LLVM_ABI bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
LLVM_ABI bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
LLVM_ABI bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
LLVM_ABI bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
LLVM_ABI bool matchConstantFoldUnaryIntOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Constant fold a unary integer op (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON variants,...
LLVM_ABI void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
LLVM_ABI bool isLegal(const LegalityQuery &Query) const
LLVM_ABI bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
LLVM_ABI bool matchOperandIsKnownToBeAPowerOfTwo(const MachineOperand &MO, bool OrNegative=false) const
Check if operand MO is known to be a power of 2.
LLVM_ABI bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
LLVM_ABI void eraseInst(MachineInstr &MI) const
Erase MI.
LLVM_ABI bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
LLVM_ABI void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
LLVM_ABI bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
LLVM_ABI void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
LLVM_ABI void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
LLVM_ABI bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
LLVM_ABI MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
LLVM_ABI void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
LLVM_ABI bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
LLVM_ABI bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
LLVM_ABI bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
LLVM_ABI bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
LLVM_ABI bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
LLVM_ABI void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
LLVM_ABI void applyRotateOutOfRange(MachineInstr &MI) const
LLVM_ABI bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
LLVM_ABI bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
LLVM_ABI bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
LLVM_ABI bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
LLVM_ABI bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
LLVM_ABI bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
LLVM_ABI bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
LLVM_ABI bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
LLVM_ABI void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
LLVM_ABI bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
LLVM_ABI bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
LLVM_ABI bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
LLVM_ABI bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchRotateOutOfRange(MachineInstr &MI) const
LLVM_ABI void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
LLVM_ABI void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
LLVM_ABI bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
LLVM_ABI bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
LLVM_ABI void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
LLVM_ABI bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
LLVM_ABI bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
LLVM_ABI void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI void applySimplifySRemByPow2(MachineInstr &MI) const
Combine G_SREM x, (+/-2^k) to a bias-and-mask sequence.
LLVM_ABI bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
LLVM_ABI bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
LLVM_ABI bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
LLVM_ABI void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
LLVM_ABI void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
LLVM_ABI bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
LLVM_ABI void applyFunnelShiftToRotate(MachineInstr &MI) const
LLVM_ABI bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
LLVM_ABI bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
LLVM_ABI bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
LLVM_ABI bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
LLVM_ABI bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
LLVM_ABI void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
LLVM_ABI bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
LLVM_ABI bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
LLVM_ABI void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
LLVM_ABI bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
LLVM_ABI bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
LLVM_ABI bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
LLVM_ABI bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
LLVM_ABI bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
LLVM_ABI bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
LLVM_ABI bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
LLVM_ABI bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
LLVM_ABI void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
LLVM_ABI bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
LLVM_ABI bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applySDivOrSRemByConst(MachineInstr &MI) const
LLVM_ABI bool matchCombineMemCpyFamily(MachineInstr &MI, MemCpyFamilyLoweringInfo &MatchInfo, unsigned MaxLen=0) const
LLVM_ABI MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
LLVM_ABI bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
LLVM_ABI bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
LLVM_ABI bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
LLVM_ABI bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
LLVM_ABI void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
LLVM_ABI void applyCommuteBinOpOperands(MachineInstr &MI) const
LLVM_ABI void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
LLVM_ABI void applySextTruncSextLoad(MachineInstr &MI) const
LLVM_ABI const MachineFunction & getMachineFunction() const
LLVM_ABI bool matchCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to G_BITCAST(G_BUILD_VECTOR(.....
LLVM_ABI bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
LLVM_ABI void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
LLVM_ABI void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
LLVM_ABI bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
LLVM_ABI void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
LLVM_ABI bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
LLVM_ABI void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
LLVM_ABI bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
LLVM_ABI void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:143
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:353
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, Register Dst, Register Src, uint64_t KnownLen, Align Alignment, bool DstAlignCanChange, ArrayRef< LLT > MemOps)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:573
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1447
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1987
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:656
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:464
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:297
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1407
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1572
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:744
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1530
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1554
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:497
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1587
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1619
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:675
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:308
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1510
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:203
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
std::tuple< Register, Register, uint64_t, Align, bool, std::vector< LLT > > MemCpyFamilyLoweringInfo
Definition Utils.h:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1440
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:911
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI bool canLowerMemCpyFamily(const MachineInstr &MI, const MachineRegisterInfo &MRI, unsigned MaxLen, Register &Dst, Register &Src, uint64_t &KnownLen, Align &Alignment, bool &DstAlignCanChange, std::vector< LLT > &MemOps)
Matcher for memcpy-like instructions.
Definition Utils.cpp:2153
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:282
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:450
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1543
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1644
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:436
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:472
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI SmallVector< APInt > ConstantFoldUnaryIntOp(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI)
Tries to constant fold a unary integer operation (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON vari...
Definition Utils.cpp:948
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:504
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1425
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:242
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...