LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
66 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
67 (void)this->VT;
68}
69
71 return *Builder.getMF().getSubtarget().getTargetLowering();
72}
73
75 return Builder.getMF();
76}
77
81
82LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
83
84/// \returns The little endian in-memory byte position of byte \p I in a
85/// \p ByteWidth bytes wide type.
86///
87/// E.g. Given a 4-byte type x, x[0] -> byte 0
88static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
89 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
90 return I;
91}
92
93/// Determines the LogBase2 value for a non-null input value using the
94/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
96 auto &MRI = *MIB.getMRI();
97 LLT Ty = MRI.getType(V);
98 auto Ctlz = MIB.buildCTLZ(Ty, V);
99 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
100 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
101}
102
103/// \returns The big endian in-memory byte position of byte \p I in a
104/// \p ByteWidth bytes wide type.
105///
106/// E.g. Given a 4-byte type x, x[0] -> byte 3
107static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
108 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
109 return ByteWidth - I - 1;
110}
111
112/// Given a map from byte offsets in memory to indices in a load/store,
113/// determine if that map corresponds to a little or big endian byte pattern.
114///
115/// \param MemOffset2Idx maps memory offsets to address offsets.
116/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
117///
118/// \returns true if the map corresponds to a big endian byte pattern, false if
119/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
120///
121/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
122/// are as follows:
123///
124/// AddrOffset Little endian Big endian
125/// 0 0 3
126/// 1 1 2
127/// 2 2 1
128/// 3 3 0
129static std::optional<bool>
131 int64_t LowestIdx) {
132 // Need at least two byte positions to decide on endianness.
133 unsigned Width = MemOffset2Idx.size();
134 if (Width < 2)
135 return std::nullopt;
136 bool BigEndian = true, LittleEndian = true;
137 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
138 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
139 if (MemOffsetAndIdx == MemOffset2Idx.end())
140 return std::nullopt;
141 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
142 assert(Idx >= 0 && "Expected non-negative byte offset?");
143 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
144 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
145 if (!BigEndian && !LittleEndian)
146 return std::nullopt;
147 }
148
149 assert((BigEndian != LittleEndian) &&
150 "Pattern cannot be both big and little endian!");
151 return BigEndian;
152}
153
155
156bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
157 assert(LI && "Must have LegalizerInfo to query isLegal!");
158 return LI->getAction(Query).Action == LegalizeActions::Legal;
159}
160
162 const LegalityQuery &Query) const {
163 return isPreLegalize() || isLegal(Query);
164}
165
167 return isLegal(Query) ||
168 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
169}
170
172 if (!Ty.isVector())
173 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
174 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
175 if (isPreLegalize())
176 return true;
177 LLT EltTy = Ty.getElementType();
178 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
179 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
180}
181
183 Register ToReg) const {
184 Observer.changingAllUsesOfReg(MRI, FromReg);
185
186 if (MRI.constrainRegAttrs(ToReg, FromReg))
187 MRI.replaceRegWith(FromReg, ToReg);
188 else
189 Builder.buildCopy(FromReg, ToReg);
190
191 Observer.finishedChangingAllUsesOfReg();
192}
193
195 MachineOperand &FromRegOp,
196 Register ToReg) const {
197 assert(FromRegOp.getParent() && "Expected an operand in an MI");
198 Observer.changingInstr(*FromRegOp.getParent());
199
200 FromRegOp.setReg(ToReg);
201
202 Observer.changedInstr(*FromRegOp.getParent());
203}
204
206 unsigned ToOpcode) const {
207 Observer.changingInstr(FromMI);
208
209 FromMI.setDesc(Builder.getTII().get(ToOpcode));
210
211 Observer.changedInstr(FromMI);
212}
213
215 return RBI->getRegBank(Reg, MRI, *TRI);
216}
217
219 const RegisterBank *RegBank) const {
220 if (RegBank)
221 MRI.setRegBank(Reg, *RegBank);
222}
223
225 if (matchCombineCopy(MI)) {
227 return true;
228 }
229 return false;
230}
232 if (MI.getOpcode() != TargetOpcode::COPY)
233 return false;
234 Register DstReg = MI.getOperand(0).getReg();
235 Register SrcReg = MI.getOperand(1).getReg();
236 return canReplaceReg(DstReg, SrcReg, MRI);
237}
239 Register DstReg = MI.getOperand(0).getReg();
240 Register SrcReg = MI.getOperand(1).getReg();
241 replaceRegWith(MRI, DstReg, SrcReg);
242 MI.eraseFromParent();
243}
244
246 MachineInstr &MI, BuildFnTy &MatchInfo) const {
247 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
248 Register DstOp = MI.getOperand(0).getReg();
249 Register OrigOp = MI.getOperand(1).getReg();
250
251 if (!MRI.hasOneNonDBGUse(OrigOp))
252 return false;
253
254 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
255 // Even if only a single operand of the PHI is not guaranteed non-poison,
256 // moving freeze() backwards across a PHI can cause optimization issues for
257 // other users of that operand.
258 //
259 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
260 // the source register is unprofitable because it makes the freeze() more
261 // strict than is necessary (it would affect the whole register instead of
262 // just the subreg being frozen).
263 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
264 return false;
265
266 if (canCreateUndefOrPoison(OrigOp, MRI,
267 /*ConsiderFlagsAndMetadata=*/false))
268 return false;
269
270 std::optional<MachineOperand> MaybePoisonOperand;
271 for (MachineOperand &Operand : OrigDef->uses()) {
272 if (!Operand.isReg())
273 return false;
274
275 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
276 continue;
277
278 if (!MaybePoisonOperand)
279 MaybePoisonOperand = Operand;
280 else {
281 // We have more than one maybe-poison operand. Moving the freeze is
282 // unsafe.
283 return false;
284 }
285 }
286
287 // Eliminate freeze if all operands are guaranteed non-poison.
288 if (!MaybePoisonOperand) {
289 MatchInfo = [=](MachineIRBuilder &B) {
290 Observer.changingInstr(*OrigDef);
291 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
292 Observer.changedInstr(*OrigDef);
293 B.buildCopy(DstOp, OrigOp);
294 };
295 return true;
296 }
297
298 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
299 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
300
301 MatchInfo = [=](MachineIRBuilder &B) mutable {
302 Observer.changingInstr(*OrigDef);
303 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
304 Observer.changedInstr(*OrigDef);
305 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
306 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
308 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
309 Freeze.getReg(0));
310 replaceRegWith(MRI, DstOp, OrigOp);
311 };
312 return true;
313}
314
317 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
318 "Invalid instruction");
319 bool IsUndef = true;
320 MachineInstr *Undef = nullptr;
321
322 // Walk over all the operands of concat vectors and check if they are
323 // build_vector themselves or undef.
324 // Then collect their operands in Ops.
325 for (const MachineOperand &MO : MI.uses()) {
326 Register Reg = MO.getReg();
327 MachineInstr *Def = MRI.getVRegDef(Reg);
328 assert(Def && "Operand not defined");
329 if (!MRI.hasOneNonDBGUse(Reg))
330 return false;
331 switch (Def->getOpcode()) {
332 case TargetOpcode::G_BUILD_VECTOR:
333 IsUndef = false;
334 // Remember the operands of the build_vector to fold
335 // them into the yet-to-build flattened concat vectors.
336 for (const MachineOperand &BuildVecMO : Def->uses())
337 Ops.push_back(BuildVecMO.getReg());
338 break;
339 case TargetOpcode::G_IMPLICIT_DEF: {
340 LLT OpType = MRI.getType(Reg);
341 // Keep one undef value for all the undef operands.
342 if (!Undef) {
343 Builder.setInsertPt(*MI.getParent(), MI);
344 Undef = Builder.buildUndef(OpType.getScalarType());
345 }
346 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
347 OpType.getScalarType() &&
348 "All undefs should have the same type");
349 // Break the undef vector in as many scalar elements as needed
350 // for the flattening.
351 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
352 EltIdx != EltEnd; ++EltIdx)
353 Ops.push_back(Undef->getOperand(0).getReg());
354 break;
355 }
356 default:
357 return false;
358 }
359 }
360
361 // Check if the combine is illegal
362 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
364 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
365 return false;
366 }
367
368 if (IsUndef)
369 Ops.clear();
370
371 return true;
372}
375 // We determined that the concat_vectors can be flatten.
376 // Generate the flattened build_vector.
377 Register DstReg = MI.getOperand(0).getReg();
378 Builder.setInsertPt(*MI.getParent(), MI);
379 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
380
381 // Note: IsUndef is sort of redundant. We could have determine it by
382 // checking that at all Ops are undef. Alternatively, we could have
383 // generate a build_vector of undefs and rely on another combine to
384 // clean that up. For now, given we already gather this information
385 // in matchCombineConcatVectors, just save compile time and issue the
386 // right thing.
387 if (Ops.empty())
388 Builder.buildUndef(NewDstReg);
389 else
390 Builder.buildBuildVector(NewDstReg, Ops);
391 replaceRegWith(MRI, DstReg, NewDstReg);
392 MI.eraseFromParent();
393}
394
396 auto &Shuffle = cast<GShuffleVector>(MI);
397
398 Register SrcVec1 = Shuffle.getSrc1Reg();
399 Register SrcVec2 = Shuffle.getSrc2Reg();
400 LLT EltTy = MRI.getType(SrcVec1).getElementType();
401 int Width = MRI.getType(SrcVec1).getNumElements();
402
403 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
404 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
405
406 SmallVector<Register> Extracts;
407 // Select only applicable elements from unmerged values.
408 for (int Val : Shuffle.getMask()) {
409 if (Val == -1)
410 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
411 else if (Val < Width)
412 Extracts.push_back(Unmerge1.getReg(Val));
413 else
414 Extracts.push_back(Unmerge2.getReg(Val - Width));
415 }
416 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
417 if (Extracts.size() == 1)
418 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
419 else
420 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
421 MI.eraseFromParent();
422}
423
426 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
427 auto ConcatMI1 =
428 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
429 auto ConcatMI2 =
430 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
431 if (!ConcatMI1 || !ConcatMI2)
432 return false;
433
434 // Check that the sources of the Concat instructions have the same type
435 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
436 MRI.getType(ConcatMI2->getSourceReg(0)))
437 return false;
438
439 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
440 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
441 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
442 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
443 // Check if the index takes a whole source register from G_CONCAT_VECTORS
444 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
445 if (Mask[i] == -1) {
446 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
447 if (i + j >= Mask.size())
448 return false;
449 if (Mask[i + j] != -1)
450 return false;
451 }
453 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
454 return false;
455 Ops.push_back(0);
456 } else if (Mask[i] % ConcatSrcNumElt == 0) {
457 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
458 if (i + j >= Mask.size())
459 return false;
460 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
461 return false;
462 }
463 // Retrieve the source register from its respective G_CONCAT_VECTORS
464 // instruction
465 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
466 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
467 } else {
468 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
469 ConcatMI1->getNumSources()));
470 }
471 } else {
472 return false;
473 }
474 }
475
477 {TargetOpcode::G_CONCAT_VECTORS,
478 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
479 return false;
480
481 return !Ops.empty();
482}
483
486 LLT SrcTy;
487 for (Register &Reg : Ops) {
488 if (Reg != 0)
489 SrcTy = MRI.getType(Reg);
490 }
491 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
492
493 Register UndefReg = 0;
494
495 for (Register &Reg : Ops) {
496 if (Reg == 0) {
497 if (UndefReg == 0)
498 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
499 Reg = UndefReg;
500 }
501 }
502
503 if (Ops.size() > 1)
504 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
505 else
506 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
507 MI.eraseFromParent();
508}
509
514 return true;
515 }
516 return false;
517}
518
521 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
522 "Invalid instruction kind");
523 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
524 Register Src1 = MI.getOperand(1).getReg();
525 LLT SrcType = MRI.getType(Src1);
526
527 unsigned DstNumElts = DstType.getNumElements();
528 unsigned SrcNumElts = SrcType.getNumElements();
529
530 // If the resulting vector is smaller than the size of the source
531 // vectors being concatenated, we won't be able to replace the
532 // shuffle vector into a concat_vectors.
533 //
534 // Note: We may still be able to produce a concat_vectors fed by
535 // extract_vector_elt and so on. It is less clear that would
536 // be better though, so don't bother for now.
537 //
538 // If the destination is a scalar, the size of the sources doesn't
539 // matter. we will lower the shuffle to a plain copy. This will
540 // work only if the source and destination have the same size. But
541 // that's covered by the next condition.
542 //
543 // TODO: If the size between the source and destination don't match
544 // we could still emit an extract vector element in that case.
545 if (DstNumElts < 2 * SrcNumElts)
546 return false;
547
548 // Check that the shuffle mask can be broken evenly between the
549 // different sources.
550 if (DstNumElts % SrcNumElts != 0)
551 return false;
552
553 // Mask length is a multiple of the source vector length.
554 // Check if the shuffle is some kind of concatenation of the input
555 // vectors.
556 unsigned NumConcat = DstNumElts / SrcNumElts;
557 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
558 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
559 for (unsigned i = 0; i != DstNumElts; ++i) {
560 int Idx = Mask[i];
561 // Undef value.
562 if (Idx < 0)
563 continue;
564 // Ensure the indices in each SrcType sized piece are sequential and that
565 // the same source is used for the whole piece.
566 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
567 (ConcatSrcs[i / SrcNumElts] >= 0 &&
568 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
569 return false;
570 // Remember which source this index came from.
571 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
572 }
573
574 // The shuffle is concatenating multiple vectors together.
575 // Collect the different operands for that.
576 Register UndefReg;
577 Register Src2 = MI.getOperand(2).getReg();
578 for (auto Src : ConcatSrcs) {
579 if (Src < 0) {
580 if (!UndefReg) {
581 Builder.setInsertPt(*MI.getParent(), MI);
582 UndefReg = Builder.buildUndef(SrcType).getReg(0);
583 }
584 Ops.push_back(UndefReg);
585 } else if (Src == 0)
586 Ops.push_back(Src1);
587 else
588 Ops.push_back(Src2);
589 }
590 return true;
591}
592
594 ArrayRef<Register> Ops) const {
595 Register DstReg = MI.getOperand(0).getReg();
596 Builder.setInsertPt(*MI.getParent(), MI);
597 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
598
599 if (Ops.size() == 1)
600 Builder.buildCopy(NewDstReg, Ops[0]);
601 else
602 Builder.buildMergeLikeInstr(NewDstReg, Ops);
603
604 replaceRegWith(MRI, DstReg, NewDstReg);
605 MI.eraseFromParent();
606}
607
608namespace {
609
610/// Select a preference between two uses. CurrentUse is the current preference
611/// while *ForCandidate is attributes of the candidate under consideration.
612PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
613 PreferredTuple &CurrentUse,
614 const LLT TyForCandidate,
615 unsigned OpcodeForCandidate,
616 MachineInstr *MIForCandidate) {
617 if (!CurrentUse.Ty.isValid()) {
618 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
619 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
620 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
621 return CurrentUse;
622 }
623
624 // We permit the extend to hoist through basic blocks but this is only
625 // sensible if the target has extending loads. If you end up lowering back
626 // into a load and extend during the legalizer then the end result is
627 // hoisting the extend up to the load.
628
629 // Prefer defined extensions to undefined extensions as these are more
630 // likely to reduce the number of instructions.
631 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
632 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
633 return CurrentUse;
634 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
635 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
636 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
637
638 // Prefer sign extensions to zero extensions as sign-extensions tend to be
639 // more expensive. Don't do this if the load is already a zero-extend load
640 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
641 // later.
642 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
643 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
644 OpcodeForCandidate == TargetOpcode::G_ZEXT)
645 return CurrentUse;
646 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
647 OpcodeForCandidate == TargetOpcode::G_SEXT)
648 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
649 }
650
651 // This is potentially target specific. We've chosen the largest type
652 // because G_TRUNC is usually free. One potential catch with this is that
653 // some targets have a reduced number of larger registers than smaller
654 // registers and this choice potentially increases the live-range for the
655 // larger value.
656 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
657 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
658 }
659 return CurrentUse;
660}
661
662/// Find a suitable place to insert some instructions and insert them. This
663/// function accounts for special cases like inserting before a PHI node.
664/// The current strategy for inserting before PHI's is to duplicate the
665/// instructions for each predecessor. However, while that's ok for G_TRUNC
666/// on most targets since it generally requires no code, other targets/cases may
667/// want to try harder to find a dominating block.
668static void InsertInsnsWithoutSideEffectsBeforeUse(
671 MachineOperand &UseMO)>
672 Inserter) {
673 MachineInstr &UseMI = *UseMO.getParent();
674
675 MachineBasicBlock *InsertBB = UseMI.getParent();
676
677 // If the use is a PHI then we want the predecessor block instead.
678 if (UseMI.isPHI()) {
679 MachineOperand *PredBB = std::next(&UseMO);
680 InsertBB = PredBB->getMBB();
681 }
682
683 // If the block is the same block as the def then we want to insert just after
684 // the def instead of at the start of the block.
685 if (InsertBB == DefMI.getParent()) {
687 Inserter(InsertBB, std::next(InsertPt), UseMO);
688 return;
689 }
690
691 // Otherwise we want the start of the BB
692 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
693}
694} // end anonymous namespace
695
697 PreferredTuple Preferred;
698 if (matchCombineExtendingLoads(MI, Preferred)) {
699 applyCombineExtendingLoads(MI, Preferred);
700 return true;
701 }
702 return false;
703}
704
705static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
706 unsigned CandidateLoadOpc;
707 switch (ExtOpc) {
708 case TargetOpcode::G_ANYEXT:
709 CandidateLoadOpc = TargetOpcode::G_LOAD;
710 break;
711 case TargetOpcode::G_SEXT:
712 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
713 break;
714 case TargetOpcode::G_ZEXT:
715 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
716 break;
717 default:
718 llvm_unreachable("Unexpected extend opc");
719 }
720 return CandidateLoadOpc;
721}
722
724 MachineInstr &MI, PreferredTuple &Preferred) const {
725 // We match the loads and follow the uses to the extend instead of matching
726 // the extends and following the def to the load. This is because the load
727 // must remain in the same position for correctness (unless we also add code
728 // to find a safe place to sink it) whereas the extend is freely movable.
729 // It also prevents us from duplicating the load for the volatile case or just
730 // for performance.
731 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
732 if (!LoadMI)
733 return false;
734
735 Register LoadReg = LoadMI->getDstReg();
736
737 LLT LoadValueTy = MRI.getType(LoadReg);
738 if (!LoadValueTy.isScalar())
739 return false;
740
741 // Most architectures are going to legalize <s8 loads into at least a 1 byte
742 // load, and the MMOs can only describe memory accesses in multiples of bytes.
743 // If we try to perform extload combining on those, we can end up with
744 // %a(s8) = extload %ptr (load 1 byte from %ptr)
745 // ... which is an illegal extload instruction.
746 if (LoadValueTy.getSizeInBits() < 8)
747 return false;
748
749 // For non power-of-2 types, they will very likely be legalized into multiple
750 // loads. Don't bother trying to match them into extending loads.
752 return false;
753
754 // Find the preferred type aside from the any-extends (unless it's the only
755 // one) and non-extending ops. We'll emit an extending load to that type and
756 // and emit a variant of (extend (trunc X)) for the others according to the
757 // relative type sizes. At the same time, pick an extend to use based on the
758 // extend involved in the chosen type.
759 unsigned PreferredOpcode =
760 isa<GLoad>(&MI)
761 ? TargetOpcode::G_ANYEXT
762 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
763 Preferred = {LLT(), PreferredOpcode, nullptr};
764 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
765 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
766 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
767 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
768 const auto &MMO = LoadMI->getMMO();
769 // Don't do anything for atomics.
770 if (MMO.isAtomic())
771 continue;
772 // Check for legality.
773 if (!isPreLegalize()) {
774 LegalityQuery::MemDesc MMDesc(MMO);
775 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
776 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
777 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
778 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
779 .Action != LegalizeActions::Legal)
780 continue;
781 }
782 Preferred = ChoosePreferredUse(MI, Preferred,
783 MRI.getType(UseMI.getOperand(0).getReg()),
784 UseMI.getOpcode(), &UseMI);
785 }
786 }
787
788 // There were no extends
789 if (!Preferred.MI)
790 return false;
791 // It should be impossible to chose an extend without selecting a different
792 // type since by definition the result of an extend is larger.
793 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
794
795 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
796 return true;
797}
798
800 MachineInstr &MI, PreferredTuple &Preferred) const {
801 // Rewrite the load to the chosen extending load.
802 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
803
804 // Inserter to insert a truncate back to the original type at a given point
805 // with some basic CSE to limit truncate duplication to one per BB.
807 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
808 MachineBasicBlock::iterator InsertBefore,
809 MachineOperand &UseMO) {
810 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
811 if (PreviouslyEmitted) {
812 Observer.changingInstr(*UseMO.getParent());
813 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
814 Observer.changedInstr(*UseMO.getParent());
815 return;
816 }
817
818 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
819 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
820 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
821 EmittedInsns[InsertIntoBB] = NewMI;
822 replaceRegOpWith(MRI, UseMO, NewDstReg);
823 };
824
825 Observer.changingInstr(MI);
826 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
827 MI.setDesc(Builder.getTII().get(LoadOpc));
828
829 // Rewrite all the uses to fix up the types.
830 auto &LoadValue = MI.getOperand(0);
832 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
833
834 for (auto *UseMO : Uses) {
835 MachineInstr *UseMI = UseMO->getParent();
836
837 // If the extend is compatible with the preferred extend then we should fix
838 // up the type and extend so that it uses the preferred use.
839 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
840 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
841 Register UseDstReg = UseMI->getOperand(0).getReg();
842 MachineOperand &UseSrcMO = UseMI->getOperand(1);
843 const LLT UseDstTy = MRI.getType(UseDstReg);
844 if (UseDstReg != ChosenDstReg) {
845 if (Preferred.Ty == UseDstTy) {
846 // If the use has the same type as the preferred use, then merge
847 // the vregs and erase the extend. For example:
848 // %1:_(s8) = G_LOAD ...
849 // %2:_(s32) = G_SEXT %1(s8)
850 // %3:_(s32) = G_ANYEXT %1(s8)
851 // ... = ... %3(s32)
852 // rewrites to:
853 // %2:_(s32) = G_SEXTLOAD ...
854 // ... = ... %2(s32)
855 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
856 Observer.erasingInstr(*UseMO->getParent());
857 UseMO->getParent()->eraseFromParent();
858 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
859 // If the preferred size is smaller, then keep the extend but extend
860 // from the result of the extending load. For example:
861 // %1:_(s8) = G_LOAD ...
862 // %2:_(s32) = G_SEXT %1(s8)
863 // %3:_(s64) = G_ANYEXT %1(s8)
864 // ... = ... %3(s64)
865 /// rewrites to:
866 // %2:_(s32) = G_SEXTLOAD ...
867 // %3:_(s64) = G_ANYEXT %2:_(s32)
868 // ... = ... %3(s64)
869 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
870 } else {
871 // If the preferred size is large, then insert a truncate. For
872 // example:
873 // %1:_(s8) = G_LOAD ...
874 // %2:_(s64) = G_SEXT %1(s8)
875 // %3:_(s32) = G_ZEXT %1(s8)
876 // ... = ... %3(s32)
877 /// rewrites to:
878 // %2:_(s64) = G_SEXTLOAD ...
879 // %4:_(s8) = G_TRUNC %2:_(s32)
880 // %3:_(s64) = G_ZEXT %2:_(s8)
881 // ... = ... %3(s64)
882 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
883 InsertTruncAt);
884 }
885 continue;
886 }
887 // The use is (one of) the uses of the preferred use we chose earlier.
888 // We're going to update the load to def this value later so just erase
889 // the old extend.
890 Observer.erasingInstr(*UseMO->getParent());
891 UseMO->getParent()->eraseFromParent();
892 continue;
893 }
894
895 // The use isn't an extend. Truncate back to the type we originally loaded.
896 // This is free on many targets.
897 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
898 }
899
900 MI.getOperand(0).setReg(ChosenDstReg);
901 Observer.changedInstr(MI);
902}
903
905 BuildFnTy &MatchInfo) const {
906 assert(MI.getOpcode() == TargetOpcode::G_AND);
907
908 // If we have the following code:
909 // %mask = G_CONSTANT 255
910 // %ld = G_LOAD %ptr, (load s16)
911 // %and = G_AND %ld, %mask
912 //
913 // Try to fold it into
914 // %ld = G_ZEXTLOAD %ptr, (load s8)
915
916 Register Dst = MI.getOperand(0).getReg();
917 if (MRI.getType(Dst).isVector())
918 return false;
919
920 auto MaybeMask =
921 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
922 if (!MaybeMask)
923 return false;
924
925 APInt MaskVal = MaybeMask->Value;
926
927 if (!MaskVal.isMask())
928 return false;
929
930 Register SrcReg = MI.getOperand(1).getReg();
931 // Don't use getOpcodeDef() here since intermediate instructions may have
932 // multiple users.
933 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
934 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
935 return false;
936
937 Register LoadReg = LoadMI->getDstReg();
938 LLT RegTy = MRI.getType(LoadReg);
939 Register PtrReg = LoadMI->getPointerReg();
940 unsigned RegSize = RegTy.getSizeInBits();
941 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
942 unsigned MaskSizeBits = MaskVal.countr_one();
943
944 // The mask may not be larger than the in-memory type, as it might cover sign
945 // extended bits
946 if (MaskSizeBits > LoadSizeBits.getValue())
947 return false;
948
949 // If the mask covers the whole destination register, there's nothing to
950 // extend
951 if (MaskSizeBits >= RegSize)
952 return false;
953
954 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
955 // at least byte loads. Avoid creating such loads here
956 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
957 return false;
958
959 const MachineMemOperand &MMO = LoadMI->getMMO();
960 LegalityQuery::MemDesc MemDesc(MMO);
961
962 // Don't modify the memory access size if this is atomic/volatile, but we can
963 // still adjust the opcode to indicate the high bit behavior.
964 if (LoadMI->isSimple())
965 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
966 else if (LoadSizeBits.getValue() > MaskSizeBits ||
967 LoadSizeBits.getValue() == RegSize)
968 return false;
969
970 // TODO: Could check if it's legal with the reduced or original memory size.
972 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
973 return false;
974
975 MatchInfo = [=](MachineIRBuilder &B) {
976 B.setInstrAndDebugLoc(*LoadMI);
977 auto &MF = B.getMF();
978 auto PtrInfo = MMO.getPointerInfo();
979 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
980 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
981 LoadMI->eraseFromParent();
982 };
983 return true;
984}
985
987 const MachineInstr &UseMI) const {
988 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
989 "shouldn't consider debug uses");
990 assert(DefMI.getParent() == UseMI.getParent());
991 if (&DefMI == &UseMI)
992 return true;
993 const MachineBasicBlock &MBB = *DefMI.getParent();
994 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
995 return &MI == &DefMI || &MI == &UseMI;
996 });
997 if (DefOrUse == MBB.end())
998 llvm_unreachable("Block must contain both DefMI and UseMI!");
999 return &*DefOrUse == &DefMI;
1000}
1001
1003 const MachineInstr &UseMI) const {
1004 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1005 "shouldn't consider debug uses");
1006 if (MDT)
1007 return MDT->dominates(&DefMI, &UseMI);
1008 else if (DefMI.getParent() != UseMI.getParent())
1009 return false;
1010
1011 return isPredecessor(DefMI, UseMI);
1012}
1013
1015 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1016 Register SrcReg = MI.getOperand(1).getReg();
1017 Register LoadUser = SrcReg;
1018
1019 if (MRI.getType(SrcReg).isVector())
1020 return false;
1021
1022 Register TruncSrc;
1023 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1024 LoadUser = TruncSrc;
1025
1026 uint64_t SizeInBits = MI.getOperand(2).getImm();
1027 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1028 // need any extend at all, just a truncate.
1029 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1030 // If truncating more than the original extended value, abort.
1031 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1032 if (TruncSrc &&
1033 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1034 return false;
1035 if (LoadSizeBits == SizeInBits)
1036 return true;
1037 }
1038 return false;
1039}
1040
1042 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1043 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1044 MI.eraseFromParent();
1045}
1046
1048 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1049 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1050
1051 Register DstReg = MI.getOperand(0).getReg();
1052 LLT RegTy = MRI.getType(DstReg);
1053
1054 // Only supports scalars for now.
1055 if (RegTy.isVector())
1056 return false;
1057
1058 Register SrcReg = MI.getOperand(1).getReg();
1059 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1060 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1061 return false;
1062
1063 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1064
1065 // If the sign extend extends from a narrower width than the load's width,
1066 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1067 // Avoid widening the load at all.
1068 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1069
1070 // Don't generate G_SEXTLOADs with a < 1 byte width.
1071 if (NewSizeBits < 8)
1072 return false;
1073 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1074 // anyway for most targets.
1075 if (!isPowerOf2_32(NewSizeBits))
1076 return false;
1077
1078 const MachineMemOperand &MMO = LoadDef->getMMO();
1079 LegalityQuery::MemDesc MMDesc(MMO);
1080
1081 // Don't modify the memory access size if this is atomic/volatile, but we can
1082 // still adjust the opcode to indicate the high bit behavior.
1083 if (LoadDef->isSimple())
1084 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1085 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1086 return false;
1087
1088 // TODO: Could check if it's legal with the reduced or original memory size.
1089 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1090 {MRI.getType(LoadDef->getDstReg()),
1091 MRI.getType(LoadDef->getPointerReg())},
1092 {MMDesc}}))
1093 return false;
1094
1095 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1096 return true;
1097}
1098
1100 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1101 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1102 Register LoadReg;
1103 unsigned ScalarSizeBits;
1104 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1105 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1106
1107 // If we have the following:
1108 // %ld = G_LOAD %ptr, (load 2)
1109 // %ext = G_SEXT_INREG %ld, 8
1110 // ==>
1111 // %ld = G_SEXTLOAD %ptr (load 1)
1112
1113 auto &MMO = LoadDef->getMMO();
1114 Builder.setInstrAndDebugLoc(*LoadDef);
1115 auto &MF = Builder.getMF();
1116 auto PtrInfo = MMO.getPointerInfo();
1117 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1118 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1119 LoadDef->getPointerReg(), *NewMMO);
1120 MI.eraseFromParent();
1121
1122 // Not all loads can be deleted, so make sure the old one is removed.
1123 LoadDef->eraseFromParent();
1124}
1125
1126/// Return true if 'MI' is a load or a store that may be fold it's address
1127/// operand into the load / store addressing mode.
1131 auto *MF = MI->getMF();
1132 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1133 if (!Addr)
1134 return false;
1135
1136 AM.HasBaseReg = true;
1137 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1138 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1139 else
1140 AM.Scale = 1; // [reg +/- reg]
1141
1142 return TLI.isLegalAddressingMode(
1143 MF->getDataLayout(), AM,
1144 getTypeForLLT(MI->getMMO().getMemoryType(),
1145 MF->getFunction().getContext()),
1146 MI->getMMO().getAddrSpace());
1147}
1148
1149static unsigned getIndexedOpc(unsigned LdStOpc) {
1150 switch (LdStOpc) {
1151 case TargetOpcode::G_LOAD:
1152 return TargetOpcode::G_INDEXED_LOAD;
1153 case TargetOpcode::G_STORE:
1154 return TargetOpcode::G_INDEXED_STORE;
1155 case TargetOpcode::G_ZEXTLOAD:
1156 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1157 case TargetOpcode::G_SEXTLOAD:
1158 return TargetOpcode::G_INDEXED_SEXTLOAD;
1159 default:
1160 llvm_unreachable("Unexpected opcode");
1161 }
1162}
1163
1164bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1165 // Check for legality.
1166 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1167 LLT Ty = MRI.getType(LdSt.getReg(0));
1168 LLT MemTy = LdSt.getMMO().getMemoryType();
1170 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1172 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1173 SmallVector<LLT> OpTys;
1174 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1175 OpTys = {PtrTy, Ty, Ty};
1176 else
1177 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1178
1179 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1180 return isLegal(Q);
1181}
1182
1184 "post-index-use-threshold", cl::Hidden, cl::init(32),
1185 cl::desc("Number of uses of a base pointer to check before it is no longer "
1186 "considered for post-indexing."));
1187
1188bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1190 bool &RematOffset) const {
1191 // We're looking for the following pattern, for either load or store:
1192 // %baseptr:_(p0) = ...
1193 // G_STORE %val(s64), %baseptr(p0)
1194 // %offset:_(s64) = G_CONSTANT i64 -256
1195 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1196 const auto &TLI = getTargetLowering();
1197
1198 Register Ptr = LdSt.getPointerReg();
1199 // If the store is the only use, don't bother.
1200 if (MRI.hasOneNonDBGUse(Ptr))
1201 return false;
1202
1203 if (!isIndexedLoadStoreLegal(LdSt))
1204 return false;
1205
1206 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1207 return false;
1208
1209 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1210 auto *PtrDef = MRI.getVRegDef(Ptr);
1211
1212 unsigned NumUsesChecked = 0;
1213 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1214 if (++NumUsesChecked > PostIndexUseThreshold)
1215 return false; // Try to avoid exploding compile time.
1216
1217 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1218 // The use itself might be dead. This can happen during combines if DCE
1219 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1220 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1221 continue;
1222
1223 // Check the user of this isn't the store, otherwise we'd be generate a
1224 // indexed store defining its own use.
1225 if (StoredValDef == &Use)
1226 continue;
1227
1228 Offset = PtrAdd->getOffsetReg();
1229 if (!ForceLegalIndexing &&
1230 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1231 /*IsPre*/ false, MRI))
1232 continue;
1233
1234 // Make sure the offset calculation is before the potentially indexed op.
1235 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1236 RematOffset = false;
1237 if (!dominates(*OffsetDef, LdSt)) {
1238 // If the offset however is just a G_CONSTANT, we can always just
1239 // rematerialize it where we need it.
1240 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1241 continue;
1242 RematOffset = true;
1243 }
1244
1245 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1246 if (&BasePtrUse == PtrDef)
1247 continue;
1248
1249 // If the user is a later load/store that can be post-indexed, then don't
1250 // combine this one.
1251 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1252 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1253 dominates(LdSt, *BasePtrLdSt) &&
1254 isIndexedLoadStoreLegal(*BasePtrLdSt))
1255 return false;
1256
1257 // Now we're looking for the key G_PTR_ADD instruction, which contains
1258 // the offset add that we want to fold.
1259 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1260 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1261 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1262 // If the use is in a different block, then we may produce worse code
1263 // due to the extra register pressure.
1264 if (BaseUseUse.getParent() != LdSt.getParent())
1265 return false;
1266
1267 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1268 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1269 return false;
1270 }
1271 if (!dominates(LdSt, BasePtrUse))
1272 return false; // All use must be dominated by the load/store.
1273 }
1274 }
1275
1276 Addr = PtrAdd->getReg(0);
1277 Base = PtrAdd->getBaseReg();
1278 return true;
1279 }
1280
1281 return false;
1282}
1283
1284bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1285 Register &Base,
1286 Register &Offset) const {
1287 auto &MF = *LdSt.getParent()->getParent();
1288 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1289
1290 Addr = LdSt.getPointerReg();
1291 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1292 MRI.hasOneNonDBGUse(Addr))
1293 return false;
1294
1295 if (!ForceLegalIndexing &&
1296 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1297 return false;
1298
1299 if (!isIndexedLoadStoreLegal(LdSt))
1300 return false;
1301
1302 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1303 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1304 return false;
1305
1306 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1307 // Would require a copy.
1308 if (Base == St->getValueReg())
1309 return false;
1310
1311 // We're expecting one use of Addr in MI, but it could also be the
1312 // value stored, which isn't actually dominated by the instruction.
1313 if (St->getValueReg() == Addr)
1314 return false;
1315 }
1316
1317 // Avoid increasing cross-block register pressure.
1318 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1319 if (AddrUse.getParent() != LdSt.getParent())
1320 return false;
1321
1322 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1323 // That might allow us to end base's liveness here by adjusting the constant.
1324 bool RealUse = false;
1325 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1326 if (!dominates(LdSt, AddrUse))
1327 return false; // All use must be dominated by the load/store.
1328
1329 // If Ptr may be folded in addressing mode of other use, then it's
1330 // not profitable to do this transformation.
1331 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1332 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1333 RealUse = true;
1334 } else {
1335 RealUse = true;
1336 }
1337 }
1338 return RealUse;
1339}
1340
1342 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1343 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1344
1345 // Check if there is a load that defines the vector being extracted from.
1346 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1347 if (!LoadMI)
1348 return false;
1349
1350 Register Vector = MI.getOperand(1).getReg();
1351 LLT VecEltTy = MRI.getType(Vector).getElementType();
1352
1353 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1354
1355 // Checking whether we should reduce the load width.
1356 if (!MRI.hasOneNonDBGUse(Vector))
1357 return false;
1358
1359 // Check if the defining load is simple.
1360 if (!LoadMI->isSimple())
1361 return false;
1362
1363 // If the vector element type is not a multiple of a byte then we are unable
1364 // to correctly compute an address to load only the extracted element as a
1365 // scalar.
1366 if (!VecEltTy.isByteSized())
1367 return false;
1368
1369 // Check for load fold barriers between the extraction and the load.
1370 if (MI.getParent() != LoadMI->getParent())
1371 return false;
1372 const unsigned MaxIter = 20;
1373 unsigned Iter = 0;
1374 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1375 if (II->isLoadFoldBarrier())
1376 return false;
1377 if (Iter++ == MaxIter)
1378 return false;
1379 }
1380
1381 // Check if the new load that we are going to create is legal
1382 // if we are in the post-legalization phase.
1383 MachineMemOperand MMO = LoadMI->getMMO();
1384 Align Alignment = MMO.getAlign();
1385 MachinePointerInfo PtrInfo;
1387
1388 // Finding the appropriate PtrInfo if offset is a known constant.
1389 // This is required to create the memory operand for the narrowed load.
1390 // This machine memory operand object helps us infer about legality
1391 // before we proceed to combine the instruction.
1392 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1393 int Elt = CVal->getZExtValue();
1394 // FIXME: should be (ABI size)*Elt.
1395 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1396 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1397 } else {
1398 // Discard the pointer info except the address space because the memory
1399 // operand can't represent this new access since the offset is variable.
1400 Offset = VecEltTy.getSizeInBits() / 8;
1402 }
1403
1404 Alignment = commonAlignment(Alignment, Offset);
1405
1406 Register VecPtr = LoadMI->getPointerReg();
1407 LLT PtrTy = MRI.getType(VecPtr);
1408
1409 MachineFunction &MF = *MI.getMF();
1410 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1411
1412 LegalityQuery::MemDesc MMDesc(*NewMMO);
1413
1415 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1416 return false;
1417
1418 // Load must be allowed and fast on the target.
1420 auto &DL = MF.getDataLayout();
1421 unsigned Fast = 0;
1422 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1423 &Fast) ||
1424 !Fast)
1425 return false;
1426
1427 Register Result = MI.getOperand(0).getReg();
1428 Register Index = MI.getOperand(2).getReg();
1429
1430 MatchInfo = [=](MachineIRBuilder &B) {
1431 GISelObserverWrapper DummyObserver;
1432 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1433 //// Get pointer to the vector element.
1434 Register finalPtr = Helper.getVectorElementPointer(
1435 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1436 Index);
1437 // New G_LOAD instruction.
1438 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1439 // Remove original GLOAD instruction.
1440 LoadMI->eraseFromParent();
1441 };
1442
1443 return true;
1444}
1445
1447 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1448 auto &LdSt = cast<GLoadStore>(MI);
1449
1450 if (LdSt.isAtomic())
1451 return false;
1452
1453 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1454 MatchInfo.Offset);
1455 if (!MatchInfo.IsPre &&
1456 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1457 MatchInfo.Offset, MatchInfo.RematOffset))
1458 return false;
1459
1460 return true;
1461}
1462
1464 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1465 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1466 unsigned Opcode = MI.getOpcode();
1467 bool IsStore = Opcode == TargetOpcode::G_STORE;
1468 unsigned NewOpcode = getIndexedOpc(Opcode);
1469
1470 // If the offset constant didn't happen to dominate the load/store, we can
1471 // just clone it as needed.
1472 if (MatchInfo.RematOffset) {
1473 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1474 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1475 *OldCst->getOperand(1).getCImm());
1476 MatchInfo.Offset = NewCst.getReg(0);
1477 }
1478
1479 auto MIB = Builder.buildInstr(NewOpcode);
1480 if (IsStore) {
1481 MIB.addDef(MatchInfo.Addr);
1482 MIB.addUse(MI.getOperand(0).getReg());
1483 } else {
1484 MIB.addDef(MI.getOperand(0).getReg());
1485 MIB.addDef(MatchInfo.Addr);
1486 }
1487
1488 MIB.addUse(MatchInfo.Base);
1489 MIB.addUse(MatchInfo.Offset);
1490 MIB.addImm(MatchInfo.IsPre);
1491 MIB->cloneMemRefs(*MI.getMF(), MI);
1492 MI.eraseFromParent();
1493 AddrDef.eraseFromParent();
1494
1495 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1496}
1497
1499 MachineInstr *&OtherMI) const {
1500 unsigned Opcode = MI.getOpcode();
1501 bool IsDiv, IsSigned;
1502
1503 switch (Opcode) {
1504 default:
1505 llvm_unreachable("Unexpected opcode!");
1506 case TargetOpcode::G_SDIV:
1507 case TargetOpcode::G_UDIV: {
1508 IsDiv = true;
1509 IsSigned = Opcode == TargetOpcode::G_SDIV;
1510 break;
1511 }
1512 case TargetOpcode::G_SREM:
1513 case TargetOpcode::G_UREM: {
1514 IsDiv = false;
1515 IsSigned = Opcode == TargetOpcode::G_SREM;
1516 break;
1517 }
1518 }
1519
1520 Register Src1 = MI.getOperand(1).getReg();
1521 unsigned DivOpcode, RemOpcode, DivremOpcode;
1522 if (IsSigned) {
1523 DivOpcode = TargetOpcode::G_SDIV;
1524 RemOpcode = TargetOpcode::G_SREM;
1525 DivremOpcode = TargetOpcode::G_SDIVREM;
1526 } else {
1527 DivOpcode = TargetOpcode::G_UDIV;
1528 RemOpcode = TargetOpcode::G_UREM;
1529 DivremOpcode = TargetOpcode::G_UDIVREM;
1530 }
1531
1532 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1533 return false;
1534
1535 // Combine:
1536 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1537 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1538 // into:
1539 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1540
1541 // Combine:
1542 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1543 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1544 // into:
1545 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1546
1547 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1548 if (MI.getParent() == UseMI.getParent() &&
1549 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1550 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1551 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1552 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1553 OtherMI = &UseMI;
1554 return true;
1555 }
1556 }
1557
1558 return false;
1559}
1560
1562 MachineInstr *&OtherMI) const {
1563 unsigned Opcode = MI.getOpcode();
1564 assert(OtherMI && "OtherMI shouldn't be empty.");
1565
1566 Register DestDivReg, DestRemReg;
1567 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1568 DestDivReg = MI.getOperand(0).getReg();
1569 DestRemReg = OtherMI->getOperand(0).getReg();
1570 } else {
1571 DestDivReg = OtherMI->getOperand(0).getReg();
1572 DestRemReg = MI.getOperand(0).getReg();
1573 }
1574
1575 bool IsSigned =
1576 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1577
1578 // Check which instruction is first in the block so we don't break def-use
1579 // deps by "moving" the instruction incorrectly. Also keep track of which
1580 // instruction is first so we pick it's operands, avoiding use-before-def
1581 // bugs.
1582 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1583 Builder.setInstrAndDebugLoc(*FirstInst);
1584
1585 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1586 : TargetOpcode::G_UDIVREM,
1587 {DestDivReg, DestRemReg},
1588 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1589 MI.eraseFromParent();
1590 OtherMI->eraseFromParent();
1591}
1592
1594 MachineInstr &MI, MachineInstr *&BrCond) const {
1595 assert(MI.getOpcode() == TargetOpcode::G_BR);
1596
1597 // Try to match the following:
1598 // bb1:
1599 // G_BRCOND %c1, %bb2
1600 // G_BR %bb3
1601 // bb2:
1602 // ...
1603 // bb3:
1604
1605 // The above pattern does not have a fall through to the successor bb2, always
1606 // resulting in a branch no matter which path is taken. Here we try to find
1607 // and replace that pattern with conditional branch to bb3 and otherwise
1608 // fallthrough to bb2. This is generally better for branch predictors.
1609
1610 MachineBasicBlock *MBB = MI.getParent();
1612 if (BrIt == MBB->begin())
1613 return false;
1614 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1615
1616 BrCond = &*std::prev(BrIt);
1617 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1618 return false;
1619
1620 // Check that the next block is the conditional branch target. Also make sure
1621 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1622 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1623 return BrCondTarget != MI.getOperand(0).getMBB() &&
1624 MBB->isLayoutSuccessor(BrCondTarget);
1625}
1626
1628 MachineInstr &MI, MachineInstr *&BrCond) const {
1629 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1630 Builder.setInstrAndDebugLoc(*BrCond);
1631 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1632 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1633 // this to i1 only since we might not know for sure what kind of
1634 // compare generated the condition value.
1635 auto True = Builder.buildConstant(
1636 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1637 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1638
1639 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1640 Observer.changingInstr(MI);
1641 MI.getOperand(0).setMBB(FallthroughBB);
1642 Observer.changedInstr(MI);
1643
1644 // Change the conditional branch to use the inverted condition and
1645 // new target block.
1646 Observer.changingInstr(*BrCond);
1647 BrCond->getOperand(0).setReg(Xor.getReg(0));
1648 BrCond->getOperand(1).setMBB(BrTarget);
1649 Observer.changedInstr(*BrCond);
1650}
1651
1653 MachineIRBuilder HelperBuilder(MI);
1654 GISelObserverWrapper DummyObserver;
1655 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1656 return Helper.lowerMemcpyInline(MI) ==
1658}
1659
1661 unsigned MaxLen) const {
1662 MachineIRBuilder HelperBuilder(MI);
1663 GISelObserverWrapper DummyObserver;
1664 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1665 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1667}
1668
1670 const MachineRegisterInfo &MRI,
1671 const APFloat &Val) {
1672 APFloat Result(Val);
1673 switch (MI.getOpcode()) {
1674 default:
1675 llvm_unreachable("Unexpected opcode!");
1676 case TargetOpcode::G_FNEG: {
1677 Result.changeSign();
1678 return Result;
1679 }
1680 case TargetOpcode::G_FABS: {
1681 Result.clearSign();
1682 return Result;
1683 }
1684 case TargetOpcode::G_FPEXT:
1685 case TargetOpcode::G_FPTRUNC: {
1686 bool Unused;
1687 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1689 &Unused);
1690 return Result;
1691 }
1692 case TargetOpcode::G_FSQRT: {
1693 bool Unused;
1695 &Unused);
1696 Result = APFloat(sqrt(Result.convertToDouble()));
1697 break;
1698 }
1699 case TargetOpcode::G_FLOG2: {
1700 bool Unused;
1702 &Unused);
1703 Result = APFloat(log2(Result.convertToDouble()));
1704 break;
1705 }
1706 }
1707 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1708 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1709 // `G_FLOG2` reach here.
1710 bool Unused;
1711 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1712 return Result;
1713}
1714
1716 MachineInstr &MI, const ConstantFP *Cst) const {
1717 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1718 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1719 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1720 MI.eraseFromParent();
1721}
1722
1724 PtrAddChain &MatchInfo) const {
1725 // We're trying to match the following pattern:
1726 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1727 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1728 // -->
1729 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1730
1731 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1732 return false;
1733
1734 Register Add2 = MI.getOperand(1).getReg();
1735 Register Imm1 = MI.getOperand(2).getReg();
1736 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1737 if (!MaybeImmVal)
1738 return false;
1739
1740 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1741 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1742 return false;
1743
1744 Register Base = Add2Def->getOperand(1).getReg();
1745 Register Imm2 = Add2Def->getOperand(2).getReg();
1746 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1747 if (!MaybeImm2Val)
1748 return false;
1749
1750 // Check if the new combined immediate forms an illegal addressing mode.
1751 // Do not combine if it was legal before but would get illegal.
1752 // To do so, we need to find a load/store user of the pointer to get
1753 // the access type.
1754 Type *AccessTy = nullptr;
1755 auto &MF = *MI.getMF();
1756 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1757 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1758 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1759 MF.getFunction().getContext());
1760 break;
1761 }
1762 }
1764 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1765 AMNew.BaseOffs = CombinedImm.getSExtValue();
1766 if (AccessTy) {
1767 AMNew.HasBaseReg = true;
1769 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1770 AMOld.HasBaseReg = true;
1771 unsigned AS = MRI.getType(Add2).getAddressSpace();
1772 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1773 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1774 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1775 return false;
1776 }
1777
1778 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1779 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1780 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1781 // largest signed integer that fits into the index type, which is the maximum
1782 // size of allocated objects according to the IR Language Reference.
1783 unsigned PtrAddFlags = MI.getFlags();
1784 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1785 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1786 bool IsInBounds =
1787 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1788 unsigned Flags = 0;
1789 if (IsNoUWrap)
1791 if (IsInBounds) {
1794 }
1795
1796 // Pass the combined immediate to the apply function.
1797 MatchInfo.Imm = AMNew.BaseOffs;
1798 MatchInfo.Base = Base;
1799 MatchInfo.Bank = getRegBank(Imm2);
1800 MatchInfo.Flags = Flags;
1801 return true;
1802}
1803
1805 PtrAddChain &MatchInfo) const {
1806 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1807 MachineIRBuilder MIB(MI);
1808 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1809 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1810 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1811 Observer.changingInstr(MI);
1812 MI.getOperand(1).setReg(MatchInfo.Base);
1813 MI.getOperand(2).setReg(NewOffset.getReg(0));
1814 MI.setFlags(MatchInfo.Flags);
1815 Observer.changedInstr(MI);
1816}
1817
1819 RegisterImmPair &MatchInfo) const {
1820 // We're trying to match the following pattern with any of
1821 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1822 // %t1 = SHIFT %base, G_CONSTANT imm1
1823 // %root = SHIFT %t1, G_CONSTANT imm2
1824 // -->
1825 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1826
1827 unsigned Opcode = MI.getOpcode();
1828 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1829 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1830 Opcode == TargetOpcode::G_USHLSAT) &&
1831 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1832
1833 Register Shl2 = MI.getOperand(1).getReg();
1834 Register Imm1 = MI.getOperand(2).getReg();
1835 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1836 if (!MaybeImmVal)
1837 return false;
1838
1839 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1840 if (Shl2Def->getOpcode() != Opcode)
1841 return false;
1842
1843 Register Base = Shl2Def->getOperand(1).getReg();
1844 Register Imm2 = Shl2Def->getOperand(2).getReg();
1845 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1846 if (!MaybeImm2Val)
1847 return false;
1848
1849 // Pass the combined immediate to the apply function.
1850 MatchInfo.Imm =
1851 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1852 MatchInfo.Reg = Base;
1853
1854 // There is no simple replacement for a saturating unsigned left shift that
1855 // exceeds the scalar size.
1856 if (Opcode == TargetOpcode::G_USHLSAT &&
1857 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1858 return false;
1859
1860 return true;
1861}
1862
1864 RegisterImmPair &MatchInfo) const {
1865 unsigned Opcode = MI.getOpcode();
1866 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1867 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1868 Opcode == TargetOpcode::G_USHLSAT) &&
1869 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1870
1871 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1872 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1873 auto Imm = MatchInfo.Imm;
1874
1875 if (Imm >= ScalarSizeInBits) {
1876 // Any logical shift that exceeds scalar size will produce zero.
1877 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1878 Builder.buildConstant(MI.getOperand(0), 0);
1879 MI.eraseFromParent();
1880 return;
1881 }
1882 // Arithmetic shift and saturating signed left shift have no effect beyond
1883 // scalar size.
1884 Imm = ScalarSizeInBits - 1;
1885 }
1886
1887 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1888 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1889 Observer.changingInstr(MI);
1890 MI.getOperand(1).setReg(MatchInfo.Reg);
1891 MI.getOperand(2).setReg(NewImm);
1892 Observer.changedInstr(MI);
1893}
1894
1896 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1897 // We're trying to match the following pattern with any of
1898 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1899 // with any of G_AND/G_OR/G_XOR logic instructions.
1900 // %t1 = SHIFT %X, G_CONSTANT C0
1901 // %t2 = LOGIC %t1, %Y
1902 // %root = SHIFT %t2, G_CONSTANT C1
1903 // -->
1904 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1905 // %t4 = SHIFT %Y, G_CONSTANT C1
1906 // %root = LOGIC %t3, %t4
1907 unsigned ShiftOpcode = MI.getOpcode();
1908 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1909 ShiftOpcode == TargetOpcode::G_ASHR ||
1910 ShiftOpcode == TargetOpcode::G_LSHR ||
1911 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1912 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1913 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1914
1915 // Match a one-use bitwise logic op.
1916 Register LogicDest = MI.getOperand(1).getReg();
1917 if (!MRI.hasOneNonDBGUse(LogicDest))
1918 return false;
1919
1920 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1921 unsigned LogicOpcode = LogicMI->getOpcode();
1922 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1923 LogicOpcode != TargetOpcode::G_XOR)
1924 return false;
1925
1926 // Find a matching one-use shift by constant.
1927 const Register C1 = MI.getOperand(2).getReg();
1928 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1929 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1930 return false;
1931
1932 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1933
1934 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1935 // Shift should match previous one and should be a one-use.
1936 if (MI->getOpcode() != ShiftOpcode ||
1937 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1938 return false;
1939
1940 // Must be a constant.
1941 auto MaybeImmVal =
1942 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1943 if (!MaybeImmVal)
1944 return false;
1945
1946 ShiftVal = MaybeImmVal->Value.getSExtValue();
1947 return true;
1948 };
1949
1950 // Logic ops are commutative, so check each operand for a match.
1951 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1952 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1953 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1954 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1955 uint64_t C0Val;
1956
1957 if (matchFirstShift(LogicMIOp1, C0Val)) {
1958 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1959 MatchInfo.Shift2 = LogicMIOp1;
1960 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1961 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1962 MatchInfo.Shift2 = LogicMIOp2;
1963 } else
1964 return false;
1965
1966 MatchInfo.ValSum = C0Val + C1Val;
1967
1968 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1969 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1970 return false;
1971
1972 MatchInfo.Logic = LogicMI;
1973 return true;
1974}
1975
1977 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1978 unsigned Opcode = MI.getOpcode();
1979 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1980 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1981 Opcode == TargetOpcode::G_SSHLSAT) &&
1982 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1983
1984 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1985 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1986
1987 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1988
1989 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1990 Register Shift1 =
1991 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1992
1993 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1994 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1995 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1996 // remove old shift1. And it will cause crash later. So erase it earlier to
1997 // avoid the crash.
1998 MatchInfo.Shift2->eraseFromParent();
1999
2000 Register Shift2Const = MI.getOperand(2).getReg();
2001 Register Shift2 = Builder
2002 .buildInstr(Opcode, {DestType},
2003 {MatchInfo.LogicNonShiftReg, Shift2Const})
2004 .getReg(0);
2005
2006 Register Dest = MI.getOperand(0).getReg();
2007 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2008
2009 // This was one use so it's safe to remove it.
2010 MatchInfo.Logic->eraseFromParent();
2011
2012 MI.eraseFromParent();
2013}
2014
2016 BuildFnTy &MatchInfo) const {
2017 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2018 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2019 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2020 auto &Shl = cast<GenericMachineInstr>(MI);
2021 Register DstReg = Shl.getReg(0);
2022 Register SrcReg = Shl.getReg(1);
2023 Register ShiftReg = Shl.getReg(2);
2024 Register X, C1;
2025
2026 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2027 return false;
2028
2029 if (!mi_match(SrcReg, MRI,
2031 m_GOr(m_Reg(X), m_Reg(C1))))))
2032 return false;
2033
2034 APInt C1Val, C2Val;
2035 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2036 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2037 return false;
2038
2039 auto *SrcDef = MRI.getVRegDef(SrcReg);
2040 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2041 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2042 LLT SrcTy = MRI.getType(SrcReg);
2043 MatchInfo = [=](MachineIRBuilder &B) {
2044 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2045 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2046 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2047 };
2048 return true;
2049}
2050
2052 LshrOfTruncOfLshr &MatchInfo,
2053 MachineInstr &ShiftMI) const {
2054 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2055
2056 Register N0 = MI.getOperand(1).getReg();
2057 Register N1 = MI.getOperand(2).getReg();
2058 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2059
2060 APInt N1C, N001C;
2061 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2062 return false;
2063 auto N001 = ShiftMI.getOperand(2).getReg();
2064 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2065 return false;
2066
2067 if (N001C.getBitWidth() > N1C.getBitWidth())
2068 N1C = N1C.zext(N001C.getBitWidth());
2069 else
2070 N001C = N001C.zext(N1C.getBitWidth());
2071
2072 Register InnerShift = ShiftMI.getOperand(0).getReg();
2073 LLT InnerShiftTy = MRI.getType(InnerShift);
2074 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2075 if ((N1C + N001C).ult(InnerShiftSize)) {
2076 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2077 MatchInfo.ShiftAmt = N1C + N001C;
2078 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2079 MatchInfo.InnerShiftTy = InnerShiftTy;
2080
2081 if ((N001C + OpSizeInBits) == InnerShiftSize)
2082 return true;
2083 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2084 MatchInfo.Mask = true;
2085 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2086 return true;
2087 }
2088 }
2089 return false;
2090}
2091
2093 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2094 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2095
2096 Register Dst = MI.getOperand(0).getReg();
2097 auto ShiftAmt =
2098 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2099 auto Shift =
2100 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2101 if (MatchInfo.Mask == true) {
2102 APInt MaskVal =
2104 MatchInfo.MaskVal.getZExtValue());
2105 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2106 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2107 Builder.buildTrunc(Dst, And);
2108 } else
2109 Builder.buildTrunc(Dst, Shift);
2110 MI.eraseFromParent();
2111}
2112
2114 unsigned &ShiftVal) const {
2115 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2116 auto MaybeImmVal =
2117 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2118 if (!MaybeImmVal)
2119 return false;
2120
2121 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2122 return (static_cast<int32_t>(ShiftVal) != -1);
2123}
2124
2126 unsigned &ShiftVal) const {
2127 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2128 MachineIRBuilder MIB(MI);
2129 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2130 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2131 Observer.changingInstr(MI);
2132 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2133 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2134 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2136 Observer.changedInstr(MI);
2137}
2138
2140 BuildFnTy &MatchInfo) const {
2141 GSub &Sub = cast<GSub>(MI);
2142
2143 LLT Ty = MRI.getType(Sub.getReg(0));
2144
2145 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2146 return false;
2147
2149 return false;
2150
2151 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2152
2153 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2154 auto NegCst = B.buildConstant(Ty, -Imm);
2155 Observer.changingInstr(MI);
2156 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2157 MI.getOperand(2).setReg(NegCst.getReg(0));
2159 if (Imm.isMinSignedValue())
2161 Observer.changedInstr(MI);
2162 };
2163 return true;
2164}
2165
2166// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2168 RegisterImmPair &MatchData) const {
2169 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2170 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2171 return false;
2172
2173 Register LHS = MI.getOperand(1).getReg();
2174
2175 Register ExtSrc;
2176 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2177 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2178 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2179 return false;
2180
2181 Register RHS = MI.getOperand(2).getReg();
2182 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2183 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2184 if (!MaybeShiftAmtVal)
2185 return false;
2186
2187 if (LI) {
2188 LLT SrcTy = MRI.getType(ExtSrc);
2189
2190 // We only really care about the legality with the shifted value. We can
2191 // pick any type the constant shift amount, so ask the target what to
2192 // use. Otherwise we would have to guess and hope it is reported as legal.
2193 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2194 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2195 return false;
2196 }
2197
2198 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2199 MatchData.Reg = ExtSrc;
2200 MatchData.Imm = ShiftAmt;
2201
2202 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2203 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2204 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2205}
2206
2208 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2209 Register ExtSrcReg = MatchData.Reg;
2210 int64_t ShiftAmtVal = MatchData.Imm;
2211
2212 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2213 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2214 auto NarrowShift =
2215 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2216 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2217 MI.eraseFromParent();
2218}
2219
2221 Register &MatchInfo) const {
2223 SmallVector<Register, 16> MergedValues;
2224 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2225 MergedValues.emplace_back(Merge.getSourceReg(I));
2226
2227 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2228 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2229 return false;
2230
2231 for (unsigned I = 0; I < MergedValues.size(); ++I)
2232 if (MergedValues[I] != Unmerge->getReg(I))
2233 return false;
2234
2235 MatchInfo = Unmerge->getSourceReg();
2236 return true;
2237}
2238
2240 const MachineRegisterInfo &MRI) {
2241 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2242 ;
2243
2244 return Reg;
2245}
2246
2248 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2249 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2250 "Expected an unmerge");
2251 auto &Unmerge = cast<GUnmerge>(MI);
2252 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2253
2254 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2255 if (!SrcInstr)
2256 return false;
2257
2258 // Check the source type of the merge.
2259 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2260 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2261 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2262 if (SrcMergeTy != Dst0Ty && !SameSize)
2263 return false;
2264 // They are the same now (modulo a bitcast).
2265 // We can collect all the src registers.
2266 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2267 Operands.push_back(SrcInstr->getSourceReg(Idx));
2268 return true;
2269}
2270
2272 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2273 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2274 "Expected an unmerge");
2275 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2276 "Not enough operands to replace all defs");
2277 unsigned NumElems = MI.getNumOperands() - 1;
2278
2279 LLT SrcTy = MRI.getType(Operands[0]);
2280 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2281 bool CanReuseInputDirectly = DstTy == SrcTy;
2282 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2283 Register DstReg = MI.getOperand(Idx).getReg();
2284 Register SrcReg = Operands[Idx];
2285
2286 // This combine may run after RegBankSelect, so we need to be aware of
2287 // register banks.
2288 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2289 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2290 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2291 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2292 }
2293
2294 if (CanReuseInputDirectly)
2295 replaceRegWith(MRI, DstReg, SrcReg);
2296 else
2297 Builder.buildCast(DstReg, SrcReg);
2298 }
2299 MI.eraseFromParent();
2300}
2301
2303 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2304 unsigned SrcIdx = MI.getNumOperands() - 1;
2305 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2306 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2307 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2308 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2309 return false;
2310 // Break down the big constant in smaller ones.
2311 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2312 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2313 ? CstVal.getCImm()->getValue()
2314 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2315
2316 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2317 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2318 // Unmerge a constant.
2319 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2320 Csts.emplace_back(Val.trunc(ShiftAmt));
2321 Val = Val.lshr(ShiftAmt);
2322 }
2323
2324 return true;
2325}
2326
2328 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2329 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2330 "Expected an unmerge");
2331 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2332 "Not enough operands to replace all defs");
2333 unsigned NumElems = MI.getNumOperands() - 1;
2334 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2335 Register DstReg = MI.getOperand(Idx).getReg();
2336 Builder.buildConstant(DstReg, Csts[Idx]);
2337 }
2338
2339 MI.eraseFromParent();
2340}
2341
2344 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2345 unsigned SrcIdx = MI.getNumOperands() - 1;
2346 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2347 MatchInfo = [&MI](MachineIRBuilder &B) {
2348 unsigned NumElems = MI.getNumOperands() - 1;
2349 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2350 Register DstReg = MI.getOperand(Idx).getReg();
2351 B.buildUndef(DstReg);
2352 }
2353 };
2354 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2355}
2356
2358 MachineInstr &MI) const {
2359 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2360 "Expected an unmerge");
2361 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2362 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2363 return false;
2364 // Check that all the lanes are dead except the first one.
2365 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2366 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2367 return false;
2368 }
2369 return true;
2370}
2371
2373 MachineInstr &MI) const {
2374 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2375 Register Dst0Reg = MI.getOperand(0).getReg();
2376 Builder.buildTrunc(Dst0Reg, SrcReg);
2377 MI.eraseFromParent();
2378}
2379
2381 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2382 "Expected an unmerge");
2383 Register Dst0Reg = MI.getOperand(0).getReg();
2384 LLT Dst0Ty = MRI.getType(Dst0Reg);
2385 // G_ZEXT on vector applies to each lane, so it will
2386 // affect all destinations. Therefore we won't be able
2387 // to simplify the unmerge to just the first definition.
2388 if (Dst0Ty.isVector())
2389 return false;
2390 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2391 LLT SrcTy = MRI.getType(SrcReg);
2392 if (SrcTy.isVector())
2393 return false;
2394
2395 Register ZExtSrcReg;
2396 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2397 return false;
2398
2399 // Finally we can replace the first definition with
2400 // a zext of the source if the definition is big enough to hold
2401 // all of ZExtSrc bits.
2402 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2403 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2404}
2405
2407 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2408 "Expected an unmerge");
2409
2410 Register Dst0Reg = MI.getOperand(0).getReg();
2411
2412 MachineInstr *ZExtInstr =
2413 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2414 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2415 "Expecting a G_ZEXT");
2416
2417 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2418 LLT Dst0Ty = MRI.getType(Dst0Reg);
2419 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2420
2421 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2422 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2423 } else {
2424 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2425 "ZExt src doesn't fit in destination");
2426 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2427 }
2428
2429 Register ZeroReg;
2430 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2431 if (!ZeroReg)
2432 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2433 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2434 }
2435 MI.eraseFromParent();
2436}
2437
2439 unsigned TargetShiftSize,
2440 unsigned &ShiftVal) const {
2441 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2442 MI.getOpcode() == TargetOpcode::G_LSHR ||
2443 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2444
2445 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2446 if (Ty.isVector()) // TODO:
2447 return false;
2448
2449 // Don't narrow further than the requested size.
2450 unsigned Size = Ty.getSizeInBits();
2451 if (Size <= TargetShiftSize)
2452 return false;
2453
2454 auto MaybeImmVal =
2455 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2456 if (!MaybeImmVal)
2457 return false;
2458
2459 ShiftVal = MaybeImmVal->Value.getSExtValue();
2460 return ShiftVal >= Size / 2 && ShiftVal < Size;
2461}
2462
2464 MachineInstr &MI, const unsigned &ShiftVal) const {
2465 Register DstReg = MI.getOperand(0).getReg();
2466 Register SrcReg = MI.getOperand(1).getReg();
2467 LLT Ty = MRI.getType(SrcReg);
2468 unsigned Size = Ty.getSizeInBits();
2469 unsigned HalfSize = Size / 2;
2470 assert(ShiftVal >= HalfSize);
2471
2472 LLT HalfTy = LLT::scalar(HalfSize);
2473
2474 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2475 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2476
2477 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2478 Register Narrowed = Unmerge.getReg(1);
2479
2480 // dst = G_LSHR s64:x, C for C >= 32
2481 // =>
2482 // lo, hi = G_UNMERGE_VALUES x
2483 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2484
2485 if (NarrowShiftAmt != 0) {
2486 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2487 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2488 }
2489
2490 auto Zero = Builder.buildConstant(HalfTy, 0);
2491 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2492 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2493 Register Narrowed = Unmerge.getReg(0);
2494 // dst = G_SHL s64:x, C for C >= 32
2495 // =>
2496 // lo, hi = G_UNMERGE_VALUES x
2497 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2498 if (NarrowShiftAmt != 0) {
2499 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2500 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2501 }
2502
2503 auto Zero = Builder.buildConstant(HalfTy, 0);
2504 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2505 } else {
2506 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2507 auto Hi = Builder.buildAShr(
2508 HalfTy, Unmerge.getReg(1),
2509 Builder.buildConstant(HalfTy, HalfSize - 1));
2510
2511 if (ShiftVal == HalfSize) {
2512 // (G_ASHR i64:x, 32) ->
2513 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2514 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2515 } else if (ShiftVal == Size - 1) {
2516 // Don't need a second shift.
2517 // (G_ASHR i64:x, 63) ->
2518 // %narrowed = (G_ASHR hi_32(x), 31)
2519 // G_MERGE_VALUES %narrowed, %narrowed
2520 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2521 } else {
2522 auto Lo = Builder.buildAShr(
2523 HalfTy, Unmerge.getReg(1),
2524 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2525
2526 // (G_ASHR i64:x, C) ->, for C >= 32
2527 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2528 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2529 }
2530 }
2531
2532 MI.eraseFromParent();
2533}
2534
2536 MachineInstr &MI, unsigned TargetShiftAmount) const {
2537 unsigned ShiftAmt;
2538 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2539 applyCombineShiftToUnmerge(MI, ShiftAmt);
2540 return true;
2541 }
2542
2543 return false;
2544}
2545
2547 Register &Reg) const {
2548 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2549 Register DstReg = MI.getOperand(0).getReg();
2550 LLT DstTy = MRI.getType(DstReg);
2551 Register SrcReg = MI.getOperand(1).getReg();
2552 return mi_match(SrcReg, MRI,
2553 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2554}
2555
2557 Register &Reg) const {
2558 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2559 Register DstReg = MI.getOperand(0).getReg();
2560 Builder.buildCopy(DstReg, Reg);
2561 MI.eraseFromParent();
2562}
2563
2565 Register &Reg) const {
2566 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2567 Register DstReg = MI.getOperand(0).getReg();
2568 Builder.buildZExtOrTrunc(DstReg, Reg);
2569 MI.eraseFromParent();
2570}
2571
2573 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2574 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2575 Register LHS = MI.getOperand(1).getReg();
2576 Register RHS = MI.getOperand(2).getReg();
2577 LLT IntTy = MRI.getType(LHS);
2578
2579 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2580 // instruction.
2581 PtrReg.second = false;
2582 for (Register SrcReg : {LHS, RHS}) {
2583 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2584 // Don't handle cases where the integer is implicitly converted to the
2585 // pointer width.
2586 LLT PtrTy = MRI.getType(PtrReg.first);
2587 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2588 return true;
2589 }
2590
2591 PtrReg.second = true;
2592 }
2593
2594 return false;
2595}
2596
2598 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2599 Register Dst = MI.getOperand(0).getReg();
2600 Register LHS = MI.getOperand(1).getReg();
2601 Register RHS = MI.getOperand(2).getReg();
2602
2603 const bool DoCommute = PtrReg.second;
2604 if (DoCommute)
2605 std::swap(LHS, RHS);
2606 LHS = PtrReg.first;
2607
2608 LLT PtrTy = MRI.getType(LHS);
2609
2610 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2611 Builder.buildPtrToInt(Dst, PtrAdd);
2612 MI.eraseFromParent();
2613}
2614
2616 APInt &NewCst) const {
2617 auto &PtrAdd = cast<GPtrAdd>(MI);
2618 Register LHS = PtrAdd.getBaseReg();
2619 Register RHS = PtrAdd.getOffsetReg();
2620 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2621
2622 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2623 APInt Cst;
2624 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2625 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2626 // G_INTTOPTR uses zero-extension
2627 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2628 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2629 return true;
2630 }
2631 }
2632
2633 return false;
2634}
2635
2637 APInt &NewCst) const {
2638 auto &PtrAdd = cast<GPtrAdd>(MI);
2639 Register Dst = PtrAdd.getReg(0);
2640
2641 Builder.buildConstant(Dst, NewCst);
2642 PtrAdd.eraseFromParent();
2643}
2644
2646 Register &Reg) const {
2647 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2648 Register DstReg = MI.getOperand(0).getReg();
2649 Register SrcReg = MI.getOperand(1).getReg();
2650 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2651 if (OriginalSrcReg.isValid())
2652 SrcReg = OriginalSrcReg;
2653 LLT DstTy = MRI.getType(DstReg);
2654 return mi_match(SrcReg, MRI,
2655 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2656 canReplaceReg(DstReg, Reg, MRI);
2657}
2658
2660 Register &Reg) const {
2661 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2662 Register DstReg = MI.getOperand(0).getReg();
2663 Register SrcReg = MI.getOperand(1).getReg();
2664 LLT DstTy = MRI.getType(DstReg);
2665 if (mi_match(SrcReg, MRI,
2666 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2667 canReplaceReg(DstReg, Reg, MRI)) {
2668 unsigned DstSize = DstTy.getScalarSizeInBits();
2669 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2670 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2671 }
2672 return false;
2673}
2674
2676 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2677 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2678
2679 // ShiftTy > 32 > TruncTy -> 32
2680 if (ShiftSize > 32 && TruncSize < 32)
2681 return ShiftTy.changeElementSize(32);
2682
2683 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2684 // Some targets like it, some don't, some only like it under certain
2685 // conditions/processor versions, etc.
2686 // A TL hook might be needed for this.
2687
2688 // Don't combine
2689 return ShiftTy;
2690}
2691
2693 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2694 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2695 Register DstReg = MI.getOperand(0).getReg();
2696 Register SrcReg = MI.getOperand(1).getReg();
2697
2698 if (!MRI.hasOneNonDBGUse(SrcReg))
2699 return false;
2700
2701 LLT SrcTy = MRI.getType(SrcReg);
2702 LLT DstTy = MRI.getType(DstReg);
2703
2704 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2705 const auto &TL = getTargetLowering();
2706
2707 LLT NewShiftTy;
2708 switch (SrcMI->getOpcode()) {
2709 default:
2710 return false;
2711 case TargetOpcode::G_SHL: {
2712 NewShiftTy = DstTy;
2713
2714 // Make sure new shift amount is legal.
2715 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2716 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2717 return false;
2718 break;
2719 }
2720 case TargetOpcode::G_LSHR:
2721 case TargetOpcode::G_ASHR: {
2722 // For right shifts, we conservatively do not do the transform if the TRUNC
2723 // has any STORE users. The reason is that if we change the type of the
2724 // shift, we may break the truncstore combine.
2725 //
2726 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2727 for (auto &User : MRI.use_instructions(DstReg))
2728 if (User.getOpcode() == TargetOpcode::G_STORE)
2729 return false;
2730
2731 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2732 if (NewShiftTy == SrcTy)
2733 return false;
2734
2735 // Make sure we won't lose information by truncating the high bits.
2736 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2737 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2738 DstTy.getScalarSizeInBits()))
2739 return false;
2740 break;
2741 }
2742 }
2743
2745 {SrcMI->getOpcode(),
2746 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2747 return false;
2748
2749 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2750 return true;
2751}
2752
2754 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2755 MachineInstr *ShiftMI = MatchInfo.first;
2756 LLT NewShiftTy = MatchInfo.second;
2757
2758 Register Dst = MI.getOperand(0).getReg();
2759 LLT DstTy = MRI.getType(Dst);
2760
2761 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2762 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2763 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2764
2765 Register NewShift =
2766 Builder
2767 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2768 .getReg(0);
2769
2770 if (NewShiftTy == DstTy)
2771 replaceRegWith(MRI, Dst, NewShift);
2772 else
2773 Builder.buildTrunc(Dst, NewShift);
2774
2775 eraseInst(MI);
2776}
2777
2779 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2780 return MO.isReg() &&
2781 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2782 });
2783}
2784
2786 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2787 return !MO.isReg() ||
2788 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2789 });
2790}
2791
2793 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2794 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2795 return all_of(Mask, [](int Elt) { return Elt < 0; });
2796}
2797
2799 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2800 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2801 MRI);
2802}
2803
2805 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2806 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2807 MRI);
2808}
2809
2811 MachineInstr &MI) const {
2812 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2813 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2814 "Expected an insert/extract element op");
2815 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2816 if (VecTy.isScalableVector())
2817 return false;
2818
2819 unsigned IdxIdx =
2820 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2821 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2822 if (!Idx)
2823 return false;
2824 return Idx->getZExtValue() >= VecTy.getNumElements();
2825}
2826
2828 unsigned &OpIdx) const {
2829 GSelect &SelMI = cast<GSelect>(MI);
2830 auto Cst =
2831 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2832 if (!Cst)
2833 return false;
2834 OpIdx = Cst->isZero() ? 3 : 2;
2835 return true;
2836}
2837
2838void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2839
2841 const MachineOperand &MOP2) const {
2842 if (!MOP1.isReg() || !MOP2.isReg())
2843 return false;
2844 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2845 if (!InstAndDef1)
2846 return false;
2847 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2848 if (!InstAndDef2)
2849 return false;
2850 MachineInstr *I1 = InstAndDef1->MI;
2851 MachineInstr *I2 = InstAndDef2->MI;
2852
2853 // Handle a case like this:
2854 //
2855 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2856 //
2857 // Even though %0 and %1 are produced by the same instruction they are not
2858 // the same values.
2859 if (I1 == I2)
2860 return MOP1.getReg() == MOP2.getReg();
2861
2862 // If we have an instruction which loads or stores, we can't guarantee that
2863 // it is identical.
2864 //
2865 // For example, we may have
2866 //
2867 // %x1 = G_LOAD %addr (load N from @somewhere)
2868 // ...
2869 // call @foo
2870 // ...
2871 // %x2 = G_LOAD %addr (load N from @somewhere)
2872 // ...
2873 // %or = G_OR %x1, %x2
2874 //
2875 // It's possible that @foo will modify whatever lives at the address we're
2876 // loading from. To be safe, let's just assume that all loads and stores
2877 // are different (unless we have something which is guaranteed to not
2878 // change.)
2879 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2880 return false;
2881
2882 // If both instructions are loads or stores, they are equal only if both
2883 // are dereferenceable invariant loads with the same number of bits.
2884 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2887 if (!LS1 || !LS2)
2888 return false;
2889
2890 if (!I2->isDereferenceableInvariantLoad() ||
2891 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2892 return false;
2893 }
2894
2895 // Check for physical registers on the instructions first to avoid cases
2896 // like this:
2897 //
2898 // %a = COPY $physreg
2899 // ...
2900 // SOMETHING implicit-def $physreg
2901 // ...
2902 // %b = COPY $physreg
2903 //
2904 // These copies are not equivalent.
2905 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2906 return MO.isReg() && MO.getReg().isPhysical();
2907 })) {
2908 // Check if we have a case like this:
2909 //
2910 // %a = COPY $physreg
2911 // %b = COPY %a
2912 //
2913 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2914 // From that, we know that they must have the same value, since they must
2915 // have come from the same COPY.
2916 return I1->isIdenticalTo(*I2);
2917 }
2918
2919 // We don't have any physical registers, so we don't necessarily need the
2920 // same vreg defs.
2921 //
2922 // On the off-chance that there's some target instruction feeding into the
2923 // instruction, let's use produceSameValue instead of isIdenticalTo.
2924 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2925 // Handle instructions with multiple defs that produce same values. Values
2926 // are same for operands with same index.
2927 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2928 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2929 // I1 and I2 are different instructions but produce same values,
2930 // %1 and %6 are same, %1 and %7 are not the same value.
2931 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2932 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2933 }
2934 return false;
2935}
2936
2938 int64_t C) const {
2939 if (!MOP.isReg())
2940 return false;
2941 auto *MI = MRI.getVRegDef(MOP.getReg());
2942 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2943 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2944 MaybeCst->getSExtValue() == C;
2945}
2946
2948 double C) const {
2949 if (!MOP.isReg())
2950 return false;
2951 std::optional<FPValueAndVReg> MaybeCst;
2952 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2953 return false;
2954
2955 return MaybeCst->Value.isExactlyValue(C);
2956}
2957
2959 unsigned OpIdx) const {
2960 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2961 Register OldReg = MI.getOperand(0).getReg();
2962 Register Replacement = MI.getOperand(OpIdx).getReg();
2963 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2964 replaceRegWith(MRI, OldReg, Replacement);
2965 MI.eraseFromParent();
2966}
2967
2969 Register Replacement) const {
2970 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2971 Register OldReg = MI.getOperand(0).getReg();
2972 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2973 replaceRegWith(MRI, OldReg, Replacement);
2974 MI.eraseFromParent();
2975}
2976
2978 unsigned ConstIdx) const {
2979 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2980 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2981
2982 // Get the shift amount
2983 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2984 if (!VRegAndVal)
2985 return false;
2986
2987 // Return true of shift amount >= Bitwidth
2988 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2989}
2990
2992 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2993 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2994 "This is not a funnel shift operation");
2995
2996 Register ConstReg = MI.getOperand(3).getReg();
2997 LLT ConstTy = MRI.getType(ConstReg);
2998 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2999
3000 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3001 assert((VRegAndVal) && "Value is not a constant");
3002
3003 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3004 APInt NewConst = VRegAndVal->Value.urem(
3005 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3006
3007 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3008 Builder.buildInstr(
3009 MI.getOpcode(), {MI.getOperand(0)},
3010 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3011
3012 MI.eraseFromParent();
3013}
3014
3016 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3017 // Match (cond ? x : x)
3018 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3019 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3020 MRI);
3021}
3022
3024 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3025 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3026 MRI);
3027}
3028
3030 unsigned OpIdx) const {
3031 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
3032 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
3033 MRI);
3034}
3035
3037 unsigned OpIdx) const {
3038 MachineOperand &MO = MI.getOperand(OpIdx);
3039 return MO.isReg() &&
3040 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3041}
3042
3044 unsigned OpIdx) const {
3045 MachineOperand &MO = MI.getOperand(OpIdx);
3046 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3047}
3048
3050 double C) const {
3051 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3052 Builder.buildFConstant(MI.getOperand(0), C);
3053 MI.eraseFromParent();
3054}
3055
3057 int64_t C) const {
3058 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3059 Builder.buildConstant(MI.getOperand(0), C);
3060 MI.eraseFromParent();
3061}
3062
3064 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3065 Builder.buildConstant(MI.getOperand(0), C);
3066 MI.eraseFromParent();
3067}
3068
3070 ConstantFP *CFP) const {
3071 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3072 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3073 MI.eraseFromParent();
3074}
3075
3077 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3078 Builder.buildUndef(MI.getOperand(0));
3079 MI.eraseFromParent();
3080}
3081
3083 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3084 Register LHS = MI.getOperand(1).getReg();
3085 Register RHS = MI.getOperand(2).getReg();
3086 Register &NewLHS = std::get<0>(MatchInfo);
3087 Register &NewRHS = std::get<1>(MatchInfo);
3088
3089 // Helper lambda to check for opportunities for
3090 // ((0-A) + B) -> B - A
3091 // (A + (0-B)) -> A - B
3092 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3093 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3094 return false;
3095 NewLHS = MaybeNewLHS;
3096 return true;
3097 };
3098
3099 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3100}
3101
3103 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3104 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3105 "Invalid opcode");
3106 Register DstReg = MI.getOperand(0).getReg();
3107 LLT DstTy = MRI.getType(DstReg);
3108 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3109
3110 if (DstTy.isScalableVector())
3111 return false;
3112
3113 unsigned NumElts = DstTy.getNumElements();
3114 // If this MI is part of a sequence of insert_vec_elts, then
3115 // don't do the combine in the middle of the sequence.
3116 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3117 TargetOpcode::G_INSERT_VECTOR_ELT)
3118 return false;
3119 MachineInstr *CurrInst = &MI;
3120 MachineInstr *TmpInst;
3121 int64_t IntImm;
3122 Register TmpReg;
3123 MatchInfo.resize(NumElts);
3124 while (mi_match(
3125 CurrInst->getOperand(0).getReg(), MRI,
3126 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3127 if (IntImm >= NumElts || IntImm < 0)
3128 return false;
3129 if (!MatchInfo[IntImm])
3130 MatchInfo[IntImm] = TmpReg;
3131 CurrInst = TmpInst;
3132 }
3133 // Variable index.
3134 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3135 return false;
3136 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3137 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3138 if (!MatchInfo[I - 1].isValid())
3139 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3140 }
3141 return true;
3142 }
3143 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3144 // overwritten, bail out.
3145 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3146 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3147}
3148
3150 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3151 Register UndefReg;
3152 auto GetUndef = [&]() {
3153 if (UndefReg)
3154 return UndefReg;
3155 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3156 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3157 return UndefReg;
3158 };
3159 for (Register &Reg : MatchInfo) {
3160 if (!Reg)
3161 Reg = GetUndef();
3162 }
3163 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3164 MI.eraseFromParent();
3165}
3166
3168 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3169 Register SubLHS, SubRHS;
3170 std::tie(SubLHS, SubRHS) = MatchInfo;
3171 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3172 MI.eraseFromParent();
3173}
3174
3176 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3177 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3178 //
3179 // Creates the new hand + logic instruction (but does not insert them.)
3180 //
3181 // On success, MatchInfo is populated with the new instructions. These are
3182 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3183 unsigned LogicOpcode = MI.getOpcode();
3184 assert(LogicOpcode == TargetOpcode::G_AND ||
3185 LogicOpcode == TargetOpcode::G_OR ||
3186 LogicOpcode == TargetOpcode::G_XOR);
3187 MachineIRBuilder MIB(MI);
3188 Register Dst = MI.getOperand(0).getReg();
3189 Register LHSReg = MI.getOperand(1).getReg();
3190 Register RHSReg = MI.getOperand(2).getReg();
3191
3192 // Don't recompute anything.
3193 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3194 return false;
3195
3196 // Make sure we have (hand x, ...), (hand y, ...)
3197 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3198 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3199 if (!LeftHandInst || !RightHandInst)
3200 return false;
3201 unsigned HandOpcode = LeftHandInst->getOpcode();
3202 if (HandOpcode != RightHandInst->getOpcode())
3203 return false;
3204 if (LeftHandInst->getNumOperands() < 2 ||
3205 !LeftHandInst->getOperand(1).isReg() ||
3206 RightHandInst->getNumOperands() < 2 ||
3207 !RightHandInst->getOperand(1).isReg())
3208 return false;
3209
3210 // Make sure the types match up, and if we're doing this post-legalization,
3211 // we end up with legal types.
3212 Register X = LeftHandInst->getOperand(1).getReg();
3213 Register Y = RightHandInst->getOperand(1).getReg();
3214 LLT XTy = MRI.getType(X);
3215 LLT YTy = MRI.getType(Y);
3216 if (!XTy.isValid() || XTy != YTy)
3217 return false;
3218
3219 // Optional extra source register.
3220 Register ExtraHandOpSrcReg;
3221 switch (HandOpcode) {
3222 default:
3223 return false;
3224 case TargetOpcode::G_ANYEXT:
3225 case TargetOpcode::G_SEXT:
3226 case TargetOpcode::G_ZEXT: {
3227 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3228 break;
3229 }
3230 case TargetOpcode::G_TRUNC: {
3231 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3232 const MachineFunction *MF = MI.getMF();
3233 LLVMContext &Ctx = MF->getFunction().getContext();
3234
3235 LLT DstTy = MRI.getType(Dst);
3236 const TargetLowering &TLI = getTargetLowering();
3237
3238 // Be extra careful sinking truncate. If it's free, there's no benefit in
3239 // widening a binop.
3240 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3241 return false;
3242 break;
3243 }
3244 case TargetOpcode::G_AND:
3245 case TargetOpcode::G_ASHR:
3246 case TargetOpcode::G_LSHR:
3247 case TargetOpcode::G_SHL: {
3248 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3249 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3250 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3251 return false;
3252 ExtraHandOpSrcReg = ZOp.getReg();
3253 break;
3254 }
3255 }
3256
3257 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3258 return false;
3259
3260 // Record the steps to build the new instructions.
3261 //
3262 // Steps to build (logic x, y)
3263 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3264 OperandBuildSteps LogicBuildSteps = {
3265 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3266 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3267 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3268 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3269
3270 // Steps to build hand (logic x, y), ...z
3271 OperandBuildSteps HandBuildSteps = {
3272 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3273 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3274 if (ExtraHandOpSrcReg.isValid())
3275 HandBuildSteps.push_back(
3276 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3277 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3278
3279 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3280 return true;
3281}
3282
3284 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3285 assert(MatchInfo.InstrsToBuild.size() &&
3286 "Expected at least one instr to build?");
3287 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3288 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3289 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3290 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3291 for (auto &OperandFn : InstrToBuild.OperandFns)
3292 OperandFn(Instr);
3293 }
3294 MI.eraseFromParent();
3295}
3296
3298 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3299 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3300 int64_t ShlCst, AshrCst;
3301 Register Src;
3302 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3303 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3304 m_ICstOrSplat(AshrCst))))
3305 return false;
3306 if (ShlCst != AshrCst)
3307 return false;
3309 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3310 return false;
3311 MatchInfo = std::make_tuple(Src, ShlCst);
3312 return true;
3313}
3314
3316 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3317 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3318 Register Src;
3319 int64_t ShiftAmt;
3320 std::tie(Src, ShiftAmt) = MatchInfo;
3321 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3322 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3323 MI.eraseFromParent();
3324}
3325
3326/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3329 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3330 assert(MI.getOpcode() == TargetOpcode::G_AND);
3331
3332 Register Dst = MI.getOperand(0).getReg();
3333 LLT Ty = MRI.getType(Dst);
3334
3335 Register R;
3336 int64_t C1;
3337 int64_t C2;
3338 if (!mi_match(
3339 Dst, MRI,
3340 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3341 return false;
3342
3343 MatchInfo = [=](MachineIRBuilder &B) {
3344 if (C1 & C2) {
3345 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3346 return;
3347 }
3348 auto Zero = B.buildConstant(Ty, 0);
3349 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3350 };
3351 return true;
3352}
3353
3355 Register &Replacement) const {
3356 // Given
3357 //
3358 // %y:_(sN) = G_SOMETHING
3359 // %x:_(sN) = G_SOMETHING
3360 // %res:_(sN) = G_AND %x, %y
3361 //
3362 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3363 //
3364 // Patterns like this can appear as a result of legalization. E.g.
3365 //
3366 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3367 // %one:_(s32) = G_CONSTANT i32 1
3368 // %and:_(s32) = G_AND %cmp, %one
3369 //
3370 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3371 assert(MI.getOpcode() == TargetOpcode::G_AND);
3372 if (!VT)
3373 return false;
3374
3375 Register AndDst = MI.getOperand(0).getReg();
3376 Register LHS = MI.getOperand(1).getReg();
3377 Register RHS = MI.getOperand(2).getReg();
3378
3379 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3380 // we can't do anything. If we do, then it depends on whether we have
3381 // KnownBits on the LHS.
3382 KnownBits RHSBits = VT->getKnownBits(RHS);
3383 if (RHSBits.isUnknown())
3384 return false;
3385
3386 KnownBits LHSBits = VT->getKnownBits(LHS);
3387
3388 // Check that x & Mask == x.
3389 // x & 1 == x, always
3390 // x & 0 == x, only if x is also 0
3391 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3392 //
3393 // Check if we can replace AndDst with the LHS of the G_AND
3394 if (canReplaceReg(AndDst, LHS, MRI) &&
3395 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3396 Replacement = LHS;
3397 return true;
3398 }
3399
3400 // Check if we can replace AndDst with the RHS of the G_AND
3401 if (canReplaceReg(AndDst, RHS, MRI) &&
3402 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3403 Replacement = RHS;
3404 return true;
3405 }
3406
3407 return false;
3408}
3409
3411 Register &Replacement) const {
3412 // Given
3413 //
3414 // %y:_(sN) = G_SOMETHING
3415 // %x:_(sN) = G_SOMETHING
3416 // %res:_(sN) = G_OR %x, %y
3417 //
3418 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3419 assert(MI.getOpcode() == TargetOpcode::G_OR);
3420 if (!VT)
3421 return false;
3422
3423 Register OrDst = MI.getOperand(0).getReg();
3424 Register LHS = MI.getOperand(1).getReg();
3425 Register RHS = MI.getOperand(2).getReg();
3426
3427 KnownBits LHSBits = VT->getKnownBits(LHS);
3428 KnownBits RHSBits = VT->getKnownBits(RHS);
3429
3430 // Check that x | Mask == x.
3431 // x | 0 == x, always
3432 // x | 1 == x, only if x is also 1
3433 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3434 //
3435 // Check if we can replace OrDst with the LHS of the G_OR
3436 if (canReplaceReg(OrDst, LHS, MRI) &&
3437 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3438 Replacement = LHS;
3439 return true;
3440 }
3441
3442 // Check if we can replace OrDst with the RHS of the G_OR
3443 if (canReplaceReg(OrDst, RHS, MRI) &&
3444 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3445 Replacement = RHS;
3446 return true;
3447 }
3448
3449 return false;
3450}
3451
3453 // If the input is already sign extended, just drop the extension.
3454 Register Src = MI.getOperand(1).getReg();
3455 unsigned ExtBits = MI.getOperand(2).getImm();
3456 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3457 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3458}
3459
3460static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3461 int64_t Cst, bool IsVector, bool IsFP) {
3462 // For i1, Cst will always be -1 regardless of boolean contents.
3463 return (ScalarSizeBits == 1 && Cst == -1) ||
3464 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3465}
3466
3467// This pattern aims to match the following shape to avoid extra mov
3468// instructions
3469// G_BUILD_VECTOR(
3470// G_UNMERGE_VALUES(src, 0)
3471// G_UNMERGE_VALUES(src, 1)
3472// G_IMPLICIT_DEF
3473// G_IMPLICIT_DEF
3474// )
3475// ->
3476// G_CONCAT_VECTORS(
3477// src,
3478// undef
3479// )
3482 Register &UnmergeSrc) const {
3483 auto &BV = cast<GBuildVector>(MI);
3484
3485 unsigned BuildUseCount = BV.getNumSources();
3486 if (BuildUseCount % 2 != 0)
3487 return false;
3488
3489 unsigned NumUnmerge = BuildUseCount / 2;
3490
3491 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3492
3493 // Check the first operand is an unmerge and has the correct number of
3494 // operands
3495 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3496 return false;
3497
3498 UnmergeSrc = Unmerge->getSourceReg();
3499
3500 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3501 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3502
3503 if (!UnmergeSrcTy.isVector())
3504 return false;
3505
3506 // Ensure we only generate legal instructions post-legalizer
3507 if (!IsPreLegalize &&
3508 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3509 return false;
3510
3511 // Check that all of the operands before the midpoint come from the same
3512 // unmerge and are in the same order as they are used in the build_vector
3513 for (unsigned I = 0; I < NumUnmerge; ++I) {
3514 auto MaybeUnmergeReg = BV.getSourceReg(I);
3515 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3516
3517 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3518 return false;
3519
3520 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3521 return false;
3522 }
3523
3524 // Check that all of the unmerged values are used
3525 if (Unmerge->getNumDefs() != NumUnmerge)
3526 return false;
3527
3528 // Check that all of the operands after the mid point are undefs.
3529 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3530 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3531
3532 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3533 return false;
3534 }
3535
3536 return true;
3537}
3538
3542 Register &UnmergeSrc) const {
3543 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3544 B.setInstrAndDebugLoc(MI);
3545
3546 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3547 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3548
3549 MI.eraseFromParent();
3550}
3551
3552// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3553// using vector truncates instead
3554//
3555// EXAMPLE:
3556// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3557// %T_a(i16) = G_TRUNC %a(i32)
3558// %T_b(i16) = G_TRUNC %b(i32)
3559// %Undef(i16) = G_IMPLICIT_DEF(i16)
3560// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3561//
3562// ===>
3563// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3564// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3565// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3566//
3567// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3569 Register &MatchInfo) const {
3570 auto BuildMI = cast<GBuildVector>(&MI);
3571 unsigned NumOperands = BuildMI->getNumSources();
3572 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3573
3574 // Check the G_BUILD_VECTOR sources
3575 unsigned I;
3576 MachineInstr *UnmergeMI = nullptr;
3577
3578 // Check all source TRUNCs come from the same UNMERGE instruction
3579 for (I = 0; I < NumOperands; ++I) {
3580 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3581 auto SrcMIOpc = SrcMI->getOpcode();
3582
3583 // Check if the G_TRUNC instructions all come from the same MI
3584 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3585 if (!UnmergeMI) {
3586 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3587 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3588 return false;
3589 } else {
3590 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3591 if (UnmergeMI != UnmergeSrcMI)
3592 return false;
3593 }
3594 } else {
3595 break;
3596 }
3597 }
3598 if (I < 2)
3599 return false;
3600
3601 // Check the remaining source elements are only G_IMPLICIT_DEF
3602 for (; I < NumOperands; ++I) {
3603 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3604 auto SrcMIOpc = SrcMI->getOpcode();
3605
3606 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3607 return false;
3608 }
3609
3610 // Check the size of unmerge source
3611 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3612 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3613 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3614 return false;
3615
3616 // Check the unmerge source and destination element types match
3617 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3618 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3619 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3620 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3621 return false;
3622
3623 // Only generate legal instructions post-legalizer
3624 if (!IsPreLegalize) {
3625 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3626
3627 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3628 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3629 return false;
3630
3631 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3632 return false;
3633 }
3634
3635 return true;
3636}
3637
3639 Register &MatchInfo) const {
3640 Register MidReg;
3641 auto BuildMI = cast<GBuildVector>(&MI);
3642 Register DstReg = BuildMI->getReg(0);
3643 LLT DstTy = MRI.getType(DstReg);
3644 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3645 unsigned DstTyNumElt = DstTy.getNumElements();
3646 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3647
3648 // No need to pad vector if only G_TRUNC is needed
3649 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3650 MidReg = MatchInfo;
3651 } else {
3652 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3653 SmallVector<Register> ConcatRegs = {MatchInfo};
3654 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3655 ConcatRegs.push_back(UndefReg);
3656
3657 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3658 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3659 }
3660
3661 Builder.buildTrunc(DstReg, MidReg);
3662 MI.eraseFromParent();
3663}
3664
3666 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3667 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3668 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3669 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3670 Register XorSrc;
3671 Register CstReg;
3672 // We match xor(src, true) here.
3673 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3674 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3675 return false;
3676
3677 if (!MRI.hasOneNonDBGUse(XorSrc))
3678 return false;
3679
3680 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3681 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3682 // list of tree nodes to visit.
3683 RegsToNegate.push_back(XorSrc);
3684 // Remember whether the comparisons are all integer or all floating point.
3685 bool IsInt = false;
3686 bool IsFP = false;
3687 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3688 Register Reg = RegsToNegate[I];
3689 if (!MRI.hasOneNonDBGUse(Reg))
3690 return false;
3691 MachineInstr *Def = MRI.getVRegDef(Reg);
3692 switch (Def->getOpcode()) {
3693 default:
3694 // Don't match if the tree contains anything other than ANDs, ORs and
3695 // comparisons.
3696 return false;
3697 case TargetOpcode::G_ICMP:
3698 if (IsFP)
3699 return false;
3700 IsInt = true;
3701 // When we apply the combine we will invert the predicate.
3702 break;
3703 case TargetOpcode::G_FCMP:
3704 if (IsInt)
3705 return false;
3706 IsFP = true;
3707 // When we apply the combine we will invert the predicate.
3708 break;
3709 case TargetOpcode::G_AND:
3710 case TargetOpcode::G_OR:
3711 // Implement De Morgan's laws:
3712 // ~(x & y) -> ~x | ~y
3713 // ~(x | y) -> ~x & ~y
3714 // When we apply the combine we will change the opcode and recursively
3715 // negate the operands.
3716 RegsToNegate.push_back(Def->getOperand(1).getReg());
3717 RegsToNegate.push_back(Def->getOperand(2).getReg());
3718 break;
3719 }
3720 }
3721
3722 // Now we know whether the comparisons are integer or floating point, check
3723 // the constant in the xor.
3724 int64_t Cst;
3725 if (Ty.isVector()) {
3726 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3727 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3728 if (!MaybeCst)
3729 return false;
3730 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3731 return false;
3732 } else {
3733 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3734 return false;
3735 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3736 return false;
3737 }
3738
3739 return true;
3740}
3741
3743 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3744 for (Register Reg : RegsToNegate) {
3745 MachineInstr *Def = MRI.getVRegDef(Reg);
3746 Observer.changingInstr(*Def);
3747 // For each comparison, invert the opcode. For each AND and OR, change the
3748 // opcode.
3749 switch (Def->getOpcode()) {
3750 default:
3751 llvm_unreachable("Unexpected opcode");
3752 case TargetOpcode::G_ICMP:
3753 case TargetOpcode::G_FCMP: {
3754 MachineOperand &PredOp = Def->getOperand(1);
3757 PredOp.setPredicate(NewP);
3758 break;
3759 }
3760 case TargetOpcode::G_AND:
3761 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3762 break;
3763 case TargetOpcode::G_OR:
3764 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3765 break;
3766 }
3767 Observer.changedInstr(*Def);
3768 }
3769
3770 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3771 MI.eraseFromParent();
3772}
3773
3775 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3776 // Match (xor (and x, y), y) (or any of its commuted cases)
3777 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3778 Register &X = MatchInfo.first;
3779 Register &Y = MatchInfo.second;
3780 Register AndReg = MI.getOperand(1).getReg();
3781 Register SharedReg = MI.getOperand(2).getReg();
3782
3783 // Find a G_AND on either side of the G_XOR.
3784 // Look for one of
3785 //
3786 // (xor (and x, y), SharedReg)
3787 // (xor SharedReg, (and x, y))
3788 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3789 std::swap(AndReg, SharedReg);
3790 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3791 return false;
3792 }
3793
3794 // Only do this if we'll eliminate the G_AND.
3795 if (!MRI.hasOneNonDBGUse(AndReg))
3796 return false;
3797
3798 // We can combine if SharedReg is the same as either the LHS or RHS of the
3799 // G_AND.
3800 if (Y != SharedReg)
3801 std::swap(X, Y);
3802 return Y == SharedReg;
3803}
3804
3806 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3807 // Fold (xor (and x, y), y) -> (and (not x), y)
3808 Register X, Y;
3809 std::tie(X, Y) = MatchInfo;
3810 auto Not = Builder.buildNot(MRI.getType(X), X);
3811 Observer.changingInstr(MI);
3812 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3813 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3814 MI.getOperand(2).setReg(Y);
3815 Observer.changedInstr(MI);
3816}
3817
3819 auto &PtrAdd = cast<GPtrAdd>(MI);
3820 Register DstReg = PtrAdd.getReg(0);
3821 LLT Ty = MRI.getType(DstReg);
3822 const DataLayout &DL = Builder.getMF().getDataLayout();
3823
3824 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3825 return false;
3826
3827 if (Ty.isPointer()) {
3828 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3829 return ConstVal && *ConstVal == 0;
3830 }
3831
3832 assert(Ty.isVector() && "Expecting a vector type");
3833 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3834 return isBuildVectorAllZeros(*VecMI, MRI);
3835}
3836
3838 auto &PtrAdd = cast<GPtrAdd>(MI);
3839 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3840 PtrAdd.eraseFromParent();
3841}
3842
3843/// The second source operand is known to be a power of 2.
3845 Register DstReg = MI.getOperand(0).getReg();
3846 Register Src0 = MI.getOperand(1).getReg();
3847 Register Pow2Src1 = MI.getOperand(2).getReg();
3848 LLT Ty = MRI.getType(DstReg);
3849
3850 // Fold (urem x, pow2) -> (and x, pow2-1)
3851 auto NegOne = Builder.buildConstant(Ty, -1);
3852 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3853 Builder.buildAnd(DstReg, Src0, Add);
3854 MI.eraseFromParent();
3855}
3856
3858 unsigned &SelectOpNo) const {
3859 Register LHS = MI.getOperand(1).getReg();
3860 Register RHS = MI.getOperand(2).getReg();
3861
3862 Register OtherOperandReg = RHS;
3863 SelectOpNo = 1;
3864 MachineInstr *Select = MRI.getVRegDef(LHS);
3865
3866 // Don't do this unless the old select is going away. We want to eliminate the
3867 // binary operator, not replace a binop with a select.
3868 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3869 !MRI.hasOneNonDBGUse(LHS)) {
3870 OtherOperandReg = LHS;
3871 SelectOpNo = 2;
3872 Select = MRI.getVRegDef(RHS);
3873 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3874 !MRI.hasOneNonDBGUse(RHS))
3875 return false;
3876 }
3877
3878 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3879 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3880
3881 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3882 /*AllowFP*/ true,
3883 /*AllowOpaqueConstants*/ false))
3884 return false;
3885 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3886 /*AllowFP*/ true,
3887 /*AllowOpaqueConstants*/ false))
3888 return false;
3889
3890 unsigned BinOpcode = MI.getOpcode();
3891
3892 // We know that one of the operands is a select of constants. Now verify that
3893 // the other binary operator operand is either a constant, or we can handle a
3894 // variable.
3895 bool CanFoldNonConst =
3896 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3897 (isNullOrNullSplat(*SelectLHS, MRI) ||
3898 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3899 (isNullOrNullSplat(*SelectRHS, MRI) ||
3900 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3901 if (CanFoldNonConst)
3902 return true;
3903
3904 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3905 /*AllowFP*/ true,
3906 /*AllowOpaqueConstants*/ false);
3907}
3908
3909/// \p SelectOperand is the operand in binary operator \p MI that is the select
3910/// to fold.
3912 MachineInstr &MI, const unsigned &SelectOperand) const {
3913 Register Dst = MI.getOperand(0).getReg();
3914 Register LHS = MI.getOperand(1).getReg();
3915 Register RHS = MI.getOperand(2).getReg();
3916 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3917
3918 Register SelectCond = Select->getOperand(1).getReg();
3919 Register SelectTrue = Select->getOperand(2).getReg();
3920 Register SelectFalse = Select->getOperand(3).getReg();
3921
3922 LLT Ty = MRI.getType(Dst);
3923 unsigned BinOpcode = MI.getOpcode();
3924
3925 Register FoldTrue, FoldFalse;
3926
3927 // We have a select-of-constants followed by a binary operator with a
3928 // constant. Eliminate the binop by pulling the constant math into the select.
3929 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3930 if (SelectOperand == 1) {
3931 // TODO: SelectionDAG verifies this actually constant folds before
3932 // committing to the combine.
3933
3934 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3935 FoldFalse =
3936 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3937 } else {
3938 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3939 FoldFalse =
3940 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3941 }
3942
3943 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3944 MI.eraseFromParent();
3945}
3946
3947std::optional<SmallVector<Register, 8>>
3948CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3949 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3950 // We want to detect if Root is part of a tree which represents a bunch
3951 // of loads being merged into a larger load. We'll try to recognize patterns
3952 // like, for example:
3953 //
3954 // Reg Reg
3955 // \ /
3956 // OR_1 Reg
3957 // \ /
3958 // OR_2
3959 // \ Reg
3960 // .. /
3961 // Root
3962 //
3963 // Reg Reg Reg Reg
3964 // \ / \ /
3965 // OR_1 OR_2
3966 // \ /
3967 // \ /
3968 // ...
3969 // Root
3970 //
3971 // Each "Reg" may have been produced by a load + some arithmetic. This
3972 // function will save each of them.
3973 SmallVector<Register, 8> RegsToVisit;
3975
3976 // In the "worst" case, we're dealing with a load for each byte. So, there
3977 // are at most #bytes - 1 ORs.
3978 const unsigned MaxIter =
3979 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3980 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3981 if (Ors.empty())
3982 break;
3983 const MachineInstr *Curr = Ors.pop_back_val();
3984 Register OrLHS = Curr->getOperand(1).getReg();
3985 Register OrRHS = Curr->getOperand(2).getReg();
3986
3987 // In the combine, we want to elimate the entire tree.
3988 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3989 return std::nullopt;
3990
3991 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3992 // something that may be a load + arithmetic.
3993 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3994 Ors.push_back(Or);
3995 else
3996 RegsToVisit.push_back(OrLHS);
3997 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3998 Ors.push_back(Or);
3999 else
4000 RegsToVisit.push_back(OrRHS);
4001 }
4002
4003 // We're going to try and merge each register into a wider power-of-2 type,
4004 // so we ought to have an even number of registers.
4005 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4006 return std::nullopt;
4007 return RegsToVisit;
4008}
4009
4010/// Helper function for findLoadOffsetsForLoadOrCombine.
4011///
4012/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4013/// and then moving that value into a specific byte offset.
4014///
4015/// e.g. x[i] << 24
4016///
4017/// \returns The load instruction and the byte offset it is moved into.
4018static std::optional<std::pair<GZExtLoad *, int64_t>>
4019matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4020 const MachineRegisterInfo &MRI) {
4021 assert(MRI.hasOneNonDBGUse(Reg) &&
4022 "Expected Reg to only have one non-debug use?");
4023 Register MaybeLoad;
4024 int64_t Shift;
4025 if (!mi_match(Reg, MRI,
4026 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4027 Shift = 0;
4028 MaybeLoad = Reg;
4029 }
4030
4031 if (Shift % MemSizeInBits != 0)
4032 return std::nullopt;
4033
4034 // TODO: Handle other types of loads.
4035 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4036 if (!Load)
4037 return std::nullopt;
4038
4039 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4040 return std::nullopt;
4041
4042 return std::make_pair(Load, Shift / MemSizeInBits);
4043}
4044
4045std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4046CombinerHelper::findLoadOffsetsForLoadOrCombine(
4048 const SmallVector<Register, 8> &RegsToVisit,
4049 const unsigned MemSizeInBits) const {
4050
4051 // Each load found for the pattern. There should be one for each RegsToVisit.
4052 SmallSetVector<const MachineInstr *, 8> Loads;
4053
4054 // The lowest index used in any load. (The lowest "i" for each x[i].)
4055 int64_t LowestIdx = INT64_MAX;
4056
4057 // The load which uses the lowest index.
4058 GZExtLoad *LowestIdxLoad = nullptr;
4059
4060 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4061 SmallSet<int64_t, 8> SeenIdx;
4062
4063 // Ensure each load is in the same MBB.
4064 // TODO: Support multiple MachineBasicBlocks.
4065 MachineBasicBlock *MBB = nullptr;
4066 const MachineMemOperand *MMO = nullptr;
4067
4068 // Earliest instruction-order load in the pattern.
4069 GZExtLoad *EarliestLoad = nullptr;
4070
4071 // Latest instruction-order load in the pattern.
4072 GZExtLoad *LatestLoad = nullptr;
4073
4074 // Base pointer which every load should share.
4076
4077 // We want to find a load for each register. Each load should have some
4078 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4079 // track of the load which uses the lowest index. Later, we will check if we
4080 // can use its pointer in the final, combined load.
4081 for (auto Reg : RegsToVisit) {
4082 // Find the load, and find the position that it will end up in (e.g. a
4083 // shifted) value.
4084 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4085 if (!LoadAndPos)
4086 return std::nullopt;
4087 GZExtLoad *Load;
4088 int64_t DstPos;
4089 std::tie(Load, DstPos) = *LoadAndPos;
4090
4091 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4092 // it is difficult to check for stores/calls/etc between loads.
4093 MachineBasicBlock *LoadMBB = Load->getParent();
4094 if (!MBB)
4095 MBB = LoadMBB;
4096 if (LoadMBB != MBB)
4097 return std::nullopt;
4098
4099 // Make sure that the MachineMemOperands of every seen load are compatible.
4100 auto &LoadMMO = Load->getMMO();
4101 if (!MMO)
4102 MMO = &LoadMMO;
4103 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4104 return std::nullopt;
4105
4106 // Find out what the base pointer and index for the load is.
4107 Register LoadPtr;
4108 int64_t Idx;
4109 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4110 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4111 LoadPtr = Load->getOperand(1).getReg();
4112 Idx = 0;
4113 }
4114
4115 // Don't combine things like a[i], a[i] -> a bigger load.
4116 if (!SeenIdx.insert(Idx).second)
4117 return std::nullopt;
4118
4119 // Every load must share the same base pointer; don't combine things like:
4120 //
4121 // a[i], b[i + 1] -> a bigger load.
4122 if (!BasePtr.isValid())
4123 BasePtr = LoadPtr;
4124 if (BasePtr != LoadPtr)
4125 return std::nullopt;
4126
4127 if (Idx < LowestIdx) {
4128 LowestIdx = Idx;
4129 LowestIdxLoad = Load;
4130 }
4131
4132 // Keep track of the byte offset that this load ends up at. If we have seen
4133 // the byte offset, then stop here. We do not want to combine:
4134 //
4135 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4136 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4137 return std::nullopt;
4138 Loads.insert(Load);
4139
4140 // Keep track of the position of the earliest/latest loads in the pattern.
4141 // We will check that there are no load fold barriers between them later
4142 // on.
4143 //
4144 // FIXME: Is there a better way to check for load fold barriers?
4145 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4146 EarliestLoad = Load;
4147 if (!LatestLoad || dominates(*LatestLoad, *Load))
4148 LatestLoad = Load;
4149 }
4150
4151 // We found a load for each register. Let's check if each load satisfies the
4152 // pattern.
4153 assert(Loads.size() == RegsToVisit.size() &&
4154 "Expected to find a load for each register?");
4155 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4156 LatestLoad && "Expected at least two loads?");
4157
4158 // Check if there are any stores, calls, etc. between any of the loads. If
4159 // there are, then we can't safely perform the combine.
4160 //
4161 // MaxIter is chosen based off the (worst case) number of iterations it
4162 // typically takes to succeed in the LLVM test suite plus some padding.
4163 //
4164 // FIXME: Is there a better way to check for load fold barriers?
4165 const unsigned MaxIter = 20;
4166 unsigned Iter = 0;
4167 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4168 LatestLoad->getIterator())) {
4169 if (Loads.count(&MI))
4170 continue;
4171 if (MI.isLoadFoldBarrier())
4172 return std::nullopt;
4173 if (Iter++ == MaxIter)
4174 return std::nullopt;
4175 }
4176
4177 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4178}
4179
4182 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4183 assert(MI.getOpcode() == TargetOpcode::G_OR);
4184 MachineFunction &MF = *MI.getMF();
4185 // Assuming a little-endian target, transform:
4186 // s8 *a = ...
4187 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4188 // =>
4189 // s32 val = *((i32)a)
4190 //
4191 // s8 *a = ...
4192 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4193 // =>
4194 // s32 val = BSWAP(*((s32)a))
4195 Register Dst = MI.getOperand(0).getReg();
4196 LLT Ty = MRI.getType(Dst);
4197 if (Ty.isVector())
4198 return false;
4199
4200 // We need to combine at least two loads into this type. Since the smallest
4201 // possible load is into a byte, we need at least a 16-bit wide type.
4202 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4203 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4204 return false;
4205
4206 // Match a collection of non-OR instructions in the pattern.
4207 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4208 if (!RegsToVisit)
4209 return false;
4210
4211 // We have a collection of non-OR instructions. Figure out how wide each of
4212 // the small loads should be based off of the number of potential loads we
4213 // found.
4214 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4215 if (NarrowMemSizeInBits % 8 != 0)
4216 return false;
4217
4218 // Check if each register feeding into each OR is a load from the same
4219 // base pointer + some arithmetic.
4220 //
4221 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4222 //
4223 // Also verify that each of these ends up putting a[i] into the same memory
4224 // offset as a load into a wide type would.
4226 GZExtLoad *LowestIdxLoad, *LatestLoad;
4227 int64_t LowestIdx;
4228 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4229 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4230 if (!MaybeLoadInfo)
4231 return false;
4232 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4233
4234 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4235 // we found before, check if this corresponds to a big or little endian byte
4236 // pattern. If it does, then we can represent it using a load + possibly a
4237 // BSWAP.
4238 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4239 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4240 if (!IsBigEndian)
4241 return false;
4242 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4243 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4244 return false;
4245
4246 // Make sure that the load from the lowest index produces offset 0 in the
4247 // final value.
4248 //
4249 // This ensures that we won't combine something like this:
4250 //
4251 // load x[i] -> byte 2
4252 // load x[i+1] -> byte 0 ---> wide_load x[i]
4253 // load x[i+2] -> byte 1
4254 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4255 const unsigned ZeroByteOffset =
4256 *IsBigEndian
4257 ? bigEndianByteAt(NumLoadsInTy, 0)
4258 : littleEndianByteAt(NumLoadsInTy, 0);
4259 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4260 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4261 ZeroOffsetIdx->second != LowestIdx)
4262 return false;
4263
4264 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4265 // may not use index 0.
4266 Register Ptr = LowestIdxLoad->getPointerReg();
4267 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4268 LegalityQuery::MemDesc MMDesc(MMO);
4269 MMDesc.MemoryTy = Ty;
4271 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4272 return false;
4273 auto PtrInfo = MMO.getPointerInfo();
4274 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4275
4276 // Load must be allowed and fast on the target.
4278 auto &DL = MF.getDataLayout();
4279 unsigned Fast = 0;
4280 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4281 !Fast)
4282 return false;
4283
4284 MatchInfo = [=](MachineIRBuilder &MIB) {
4285 MIB.setInstrAndDebugLoc(*LatestLoad);
4286 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4287 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4288 if (NeedsBSwap)
4289 MIB.buildBSwap(Dst, LoadDst);
4290 };
4291 return true;
4292}
4293
4295 MachineInstr *&ExtMI) const {
4296 auto &PHI = cast<GPhi>(MI);
4297 Register DstReg = PHI.getReg(0);
4298
4299 // TODO: Extending a vector may be expensive, don't do this until heuristics
4300 // are better.
4301 if (MRI.getType(DstReg).isVector())
4302 return false;
4303
4304 // Try to match a phi, whose only use is an extend.
4305 if (!MRI.hasOneNonDBGUse(DstReg))
4306 return false;
4307 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4308 switch (ExtMI->getOpcode()) {
4309 case TargetOpcode::G_ANYEXT:
4310 return true; // G_ANYEXT is usually free.
4311 case TargetOpcode::G_ZEXT:
4312 case TargetOpcode::G_SEXT:
4313 break;
4314 default:
4315 return false;
4316 }
4317
4318 // If the target is likely to fold this extend away, don't propagate.
4319 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4320 return false;
4321
4322 // We don't want to propagate the extends unless there's a good chance that
4323 // they'll be optimized in some way.
4324 // Collect the unique incoming values.
4326 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4327 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4328 switch (DefMI->getOpcode()) {
4329 case TargetOpcode::G_LOAD:
4330 case TargetOpcode::G_TRUNC:
4331 case TargetOpcode::G_SEXT:
4332 case TargetOpcode::G_ZEXT:
4333 case TargetOpcode::G_ANYEXT:
4334 case TargetOpcode::G_CONSTANT:
4335 InSrcs.insert(DefMI);
4336 // Don't try to propagate if there are too many places to create new
4337 // extends, chances are it'll increase code size.
4338 if (InSrcs.size() > 2)
4339 return false;
4340 break;
4341 default:
4342 return false;
4343 }
4344 }
4345 return true;
4346}
4347
4349 MachineInstr *&ExtMI) const {
4350 auto &PHI = cast<GPhi>(MI);
4351 Register DstReg = ExtMI->getOperand(0).getReg();
4352 LLT ExtTy = MRI.getType(DstReg);
4353
4354 // Propagate the extension into the block of each incoming reg's block.
4355 // Use a SetVector here because PHIs can have duplicate edges, and we want
4356 // deterministic iteration order.
4359 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4360 auto SrcReg = PHI.getIncomingValue(I);
4361 auto *SrcMI = MRI.getVRegDef(SrcReg);
4362 if (!SrcMIs.insert(SrcMI))
4363 continue;
4364
4365 // Build an extend after each src inst.
4366 auto *MBB = SrcMI->getParent();
4367 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4368 if (InsertPt != MBB->end() && InsertPt->isPHI())
4369 InsertPt = MBB->getFirstNonPHI();
4370
4371 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4372 Builder.setDebugLoc(MI.getDebugLoc());
4373 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4374 OldToNewSrcMap[SrcMI] = NewExt;
4375 }
4376
4377 // Create a new phi with the extended inputs.
4378 Builder.setInstrAndDebugLoc(MI);
4379 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4380 NewPhi.addDef(DstReg);
4381 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4382 if (!MO.isReg()) {
4383 NewPhi.addMBB(MO.getMBB());
4384 continue;
4385 }
4386 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4387 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4388 }
4389 Builder.insertInstr(NewPhi);
4390 ExtMI->eraseFromParent();
4391}
4392
4394 Register &Reg) const {
4395 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4396 // If we have a constant index, look for a G_BUILD_VECTOR source
4397 // and find the source register that the index maps to.
4398 Register SrcVec = MI.getOperand(1).getReg();
4399 LLT SrcTy = MRI.getType(SrcVec);
4400 if (SrcTy.isScalableVector())
4401 return false;
4402
4403 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4404 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4405 return false;
4406
4407 unsigned VecIdx = Cst->Value.getZExtValue();
4408
4409 // Check if we have a build_vector or build_vector_trunc with an optional
4410 // trunc in front.
4411 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4412 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4413 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4414 }
4415
4416 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4417 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4418 return false;
4419
4420 EVT Ty(getMVTForLLT(SrcTy));
4421 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4422 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4423 return false;
4424
4425 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4426 return true;
4427}
4428
4430 Register &Reg) const {
4431 // Check the type of the register, since it may have come from a
4432 // G_BUILD_VECTOR_TRUNC.
4433 LLT ScalarTy = MRI.getType(Reg);
4434 Register DstReg = MI.getOperand(0).getReg();
4435 LLT DstTy = MRI.getType(DstReg);
4436
4437 if (ScalarTy != DstTy) {
4438 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4439 Builder.buildTrunc(DstReg, Reg);
4440 MI.eraseFromParent();
4441 return;
4442 }
4444}
4445
4448 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4449 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4450 // This combine tries to find build_vector's which have every source element
4451 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4452 // the masked load scalarization is run late in the pipeline. There's already
4453 // a combine for a similar pattern starting from the extract, but that
4454 // doesn't attempt to do it if there are multiple uses of the build_vector,
4455 // which in this case is true. Starting the combine from the build_vector
4456 // feels more natural than trying to find sibling nodes of extracts.
4457 // E.g.
4458 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4459 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4460 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4461 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4462 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4463 // ==>
4464 // replace ext{1,2,3,4} with %s{1,2,3,4}
4465
4466 Register DstReg = MI.getOperand(0).getReg();
4467 LLT DstTy = MRI.getType(DstReg);
4468 unsigned NumElts = DstTy.getNumElements();
4469
4470 SmallBitVector ExtractedElts(NumElts);
4471 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4472 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4473 return false;
4474 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4475 if (!Cst)
4476 return false;
4477 unsigned Idx = Cst->getZExtValue();
4478 if (Idx >= NumElts)
4479 return false; // Out of range.
4480 ExtractedElts.set(Idx);
4481 SrcDstPairs.emplace_back(
4482 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4483 }
4484 // Match if every element was extracted.
4485 return ExtractedElts.all();
4486}
4487
4490 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4491 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4492 for (auto &Pair : SrcDstPairs) {
4493 auto *ExtMI = Pair.second;
4494 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4495 ExtMI->eraseFromParent();
4496 }
4497 MI.eraseFromParent();
4498}
4499
4502 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4503 applyBuildFnNoErase(MI, MatchInfo);
4504 MI.eraseFromParent();
4505}
4506
4509 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4510 MatchInfo(Builder);
4511}
4512
4514 bool AllowScalarConstants,
4515 BuildFnTy &MatchInfo) const {
4516 assert(MI.getOpcode() == TargetOpcode::G_OR);
4517
4518 Register Dst = MI.getOperand(0).getReg();
4519 LLT Ty = MRI.getType(Dst);
4520 unsigned BitWidth = Ty.getScalarSizeInBits();
4521
4522 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4523 unsigned FshOpc = 0;
4524
4525 // Match (or (shl ...), (lshr ...)).
4526 if (!mi_match(Dst, MRI,
4527 // m_GOr() handles the commuted version as well.
4528 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4529 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4530 return false;
4531
4532 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4533 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4534 int64_t CstShlAmt = 0, CstLShrAmt;
4535 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4536 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4537 CstShlAmt + CstLShrAmt == BitWidth) {
4538 FshOpc = TargetOpcode::G_FSHR;
4539 Amt = LShrAmt;
4540 } else if (mi_match(LShrAmt, MRI,
4542 ShlAmt == Amt) {
4543 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4544 FshOpc = TargetOpcode::G_FSHL;
4545 } else if (mi_match(ShlAmt, MRI,
4547 LShrAmt == Amt) {
4548 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4549 FshOpc = TargetOpcode::G_FSHR;
4550 } else {
4551 return false;
4552 }
4553
4554 LLT AmtTy = MRI.getType(Amt);
4555 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4556 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4557 return false;
4558
4559 MatchInfo = [=](MachineIRBuilder &B) {
4560 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4561 };
4562 return true;
4563}
4564
4565/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4567 unsigned Opc = MI.getOpcode();
4568 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4569 Register X = MI.getOperand(1).getReg();
4570 Register Y = MI.getOperand(2).getReg();
4571 if (X != Y)
4572 return false;
4573 unsigned RotateOpc =
4574 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4575 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4576}
4577
4579 unsigned Opc = MI.getOpcode();
4580 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4581 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4582 Observer.changingInstr(MI);
4583 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4584 : TargetOpcode::G_ROTR));
4585 MI.removeOperand(2);
4586 Observer.changedInstr(MI);
4587}
4588
4589// Fold (rot x, c) -> (rot x, c % BitSize)
4591 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4592 MI.getOpcode() == TargetOpcode::G_ROTR);
4593 unsigned Bitsize =
4594 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4595 Register AmtReg = MI.getOperand(2).getReg();
4596 bool OutOfRange = false;
4597 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4598 if (auto *CI = dyn_cast<ConstantInt>(C))
4599 OutOfRange |= CI->getValue().uge(Bitsize);
4600 return true;
4601 };
4602 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4603}
4604
4606 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4607 MI.getOpcode() == TargetOpcode::G_ROTR);
4608 unsigned Bitsize =
4609 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4610 Register Amt = MI.getOperand(2).getReg();
4611 LLT AmtTy = MRI.getType(Amt);
4612 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4613 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4614 Observer.changingInstr(MI);
4615 MI.getOperand(2).setReg(Amt);
4616 Observer.changedInstr(MI);
4617}
4618
4620 int64_t &MatchInfo) const {
4621 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4622 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4623
4624 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4625 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4626 // KnownBits on the LHS in two cases:
4627 //
4628 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4629 // we cannot do any transforms so we can safely bail out early.
4630 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4631 // >=0.
4632 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4633 if (KnownRHS.isUnknown())
4634 return false;
4635
4636 std::optional<bool> KnownVal;
4637 if (KnownRHS.isZero()) {
4638 // ? uge 0 -> always true
4639 // ? ult 0 -> always false
4640 if (Pred == CmpInst::ICMP_UGE)
4641 KnownVal = true;
4642 else if (Pred == CmpInst::ICMP_ULT)
4643 KnownVal = false;
4644 }
4645
4646 if (!KnownVal) {
4647 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4648 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4649 }
4650
4651 if (!KnownVal)
4652 return false;
4653 MatchInfo =
4654 *KnownVal
4656 /*IsVector = */
4657 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4658 /* IsFP = */ false)
4659 : 0;
4660 return true;
4661}
4662
4665 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4666 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4667 // Given:
4668 //
4669 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4670 // %cmp = G_ICMP ne %x, 0
4671 //
4672 // Or:
4673 //
4674 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4675 // %cmp = G_ICMP eq %x, 1
4676 //
4677 // We can replace %cmp with %x assuming true is 1 on the target.
4678 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4679 if (!CmpInst::isEquality(Pred))
4680 return false;
4681 Register Dst = MI.getOperand(0).getReg();
4682 LLT DstTy = MRI.getType(Dst);
4684 /* IsFP = */ false) != 1)
4685 return false;
4686 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4687 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4688 return false;
4689 Register LHS = MI.getOperand(2).getReg();
4690 auto KnownLHS = VT->getKnownBits(LHS);
4691 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4692 return false;
4693 // Make sure replacing Dst with the LHS is a legal operation.
4694 LLT LHSTy = MRI.getType(LHS);
4695 unsigned LHSSize = LHSTy.getSizeInBits();
4696 unsigned DstSize = DstTy.getSizeInBits();
4697 unsigned Op = TargetOpcode::COPY;
4698 if (DstSize != LHSSize)
4699 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4700 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4701 return false;
4702 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4703 return true;
4704}
4705
4706// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4709 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4710 assert(MI.getOpcode() == TargetOpcode::G_AND);
4711
4712 // Ignore vector types to simplify matching the two constants.
4713 // TODO: do this for vectors and scalars via a demanded bits analysis.
4714 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4715 if (Ty.isVector())
4716 return false;
4717
4718 Register Src;
4719 Register AndMaskReg;
4720 int64_t AndMaskBits;
4721 int64_t OrMaskBits;
4722 if (!mi_match(MI, MRI,
4723 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4724 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4725 return false;
4726
4727 // Check if OrMask could turn on any bits in Src.
4728 if (AndMaskBits & OrMaskBits)
4729 return false;
4730
4731 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4732 Observer.changingInstr(MI);
4733 // Canonicalize the result to have the constant on the RHS.
4734 if (MI.getOperand(1).getReg() == AndMaskReg)
4735 MI.getOperand(2).setReg(AndMaskReg);
4736 MI.getOperand(1).setReg(Src);
4737 Observer.changedInstr(MI);
4738 };
4739 return true;
4740}
4741
4742/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4745 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4746 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4747 Register Dst = MI.getOperand(0).getReg();
4748 Register Src = MI.getOperand(1).getReg();
4749 LLT Ty = MRI.getType(Src);
4751 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4752 return false;
4753 int64_t Width = MI.getOperand(2).getImm();
4754 Register ShiftSrc;
4755 int64_t ShiftImm;
4756 if (!mi_match(
4757 Src, MRI,
4758 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4759 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4760 return false;
4761 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4762 return false;
4763
4764 MatchInfo = [=](MachineIRBuilder &B) {
4765 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4766 auto Cst2 = B.buildConstant(ExtractTy, Width);
4767 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4768 };
4769 return true;
4770}
4771
4772/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4774 BuildFnTy &MatchInfo) const {
4775 GAnd *And = cast<GAnd>(&MI);
4776 Register Dst = And->getReg(0);
4777 LLT Ty = MRI.getType(Dst);
4779 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4780 // into account.
4781 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4782 return false;
4783
4784 int64_t AndImm, LSBImm;
4785 Register ShiftSrc;
4786 const unsigned Size = Ty.getScalarSizeInBits();
4787 if (!mi_match(And->getReg(0), MRI,
4788 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4789 m_ICst(AndImm))))
4790 return false;
4791
4792 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4793 auto MaybeMask = static_cast<uint64_t>(AndImm);
4794 if (MaybeMask & (MaybeMask + 1))
4795 return false;
4796
4797 // LSB must fit within the register.
4798 if (static_cast<uint64_t>(LSBImm) >= Size)
4799 return false;
4800
4801 uint64_t Width = APInt(Size, AndImm).countr_one();
4802 MatchInfo = [=](MachineIRBuilder &B) {
4803 auto WidthCst = B.buildConstant(ExtractTy, Width);
4804 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4805 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4806 };
4807 return true;
4808}
4809
4812 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4813 const unsigned Opcode = MI.getOpcode();
4814 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4815
4816 const Register Dst = MI.getOperand(0).getReg();
4817
4818 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4819 ? TargetOpcode::G_SBFX
4820 : TargetOpcode::G_UBFX;
4821
4822 // Check if the type we would use for the extract is legal
4823 LLT Ty = MRI.getType(Dst);
4825 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4826 return false;
4827
4828 Register ShlSrc;
4829 int64_t ShrAmt;
4830 int64_t ShlAmt;
4831 const unsigned Size = Ty.getScalarSizeInBits();
4832
4833 // Try to match shr (shl x, c1), c2
4834 if (!mi_match(Dst, MRI,
4835 m_BinOp(Opcode,
4836 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4837 m_ICst(ShrAmt))))
4838 return false;
4839
4840 // Make sure that the shift sizes can fit a bitfield extract
4841 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4842 return false;
4843
4844 // Skip this combine if the G_SEXT_INREG combine could handle it
4845 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4846 return false;
4847
4848 // Calculate start position and width of the extract
4849 const int64_t Pos = ShrAmt - ShlAmt;
4850 const int64_t Width = Size - ShrAmt;
4851
4852 MatchInfo = [=](MachineIRBuilder &B) {
4853 auto WidthCst = B.buildConstant(ExtractTy, Width);
4854 auto PosCst = B.buildConstant(ExtractTy, Pos);
4855 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4856 };
4857 return true;
4858}
4859
4862 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4863 const unsigned Opcode = MI.getOpcode();
4864 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4865
4866 const Register Dst = MI.getOperand(0).getReg();
4867 LLT Ty = MRI.getType(Dst);
4869 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4870 return false;
4871
4872 // Try to match shr (and x, c1), c2
4873 Register AndSrc;
4874 int64_t ShrAmt;
4875 int64_t SMask;
4876 if (!mi_match(Dst, MRI,
4877 m_BinOp(Opcode,
4878 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4879 m_ICst(ShrAmt))))
4880 return false;
4881
4882 const unsigned Size = Ty.getScalarSizeInBits();
4883 if (ShrAmt < 0 || ShrAmt >= Size)
4884 return false;
4885
4886 // If the shift subsumes the mask, emit the 0 directly.
4887 if (0 == (SMask >> ShrAmt)) {
4888 MatchInfo = [=](MachineIRBuilder &B) {
4889 B.buildConstant(Dst, 0);
4890 };
4891 return true;
4892 }
4893
4894 // Check that ubfx can do the extraction, with no holes in the mask.
4895 uint64_t UMask = SMask;
4896 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4898 if (!isMask_64(UMask))
4899 return false;
4900
4901 // Calculate start position and width of the extract.
4902 const int64_t Pos = ShrAmt;
4903 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4904
4905 // It's preferable to keep the shift, rather than form G_SBFX.
4906 // TODO: remove the G_AND via demanded bits analysis.
4907 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4908 return false;
4909
4910 MatchInfo = [=](MachineIRBuilder &B) {
4911 auto WidthCst = B.buildConstant(ExtractTy, Width);
4912 auto PosCst = B.buildConstant(ExtractTy, Pos);
4913 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4914 };
4915 return true;
4916}
4917
4918bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4919 MachineInstr &MI) const {
4920 auto &PtrAdd = cast<GPtrAdd>(MI);
4921
4922 Register Src1Reg = PtrAdd.getBaseReg();
4923 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4924 if (!Src1Def)
4925 return false;
4926
4927 Register Src2Reg = PtrAdd.getOffsetReg();
4928
4929 if (MRI.hasOneNonDBGUse(Src1Reg))
4930 return false;
4931
4932 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4933 if (!C1)
4934 return false;
4935 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4936 if (!C2)
4937 return false;
4938
4939 const APInt &C1APIntVal = *C1;
4940 const APInt &C2APIntVal = *C2;
4941 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4942
4943 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4944 // This combine may end up running before ptrtoint/inttoptr combines
4945 // manage to eliminate redundant conversions, so try to look through them.
4946 MachineInstr *ConvUseMI = &UseMI;
4947 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4948 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4949 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4950 Register DefReg = ConvUseMI->getOperand(0).getReg();
4951 if (!MRI.hasOneNonDBGUse(DefReg))
4952 break;
4953 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4954 ConvUseOpc = ConvUseMI->getOpcode();
4955 }
4956 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4957 if (!LdStMI)
4958 continue;
4959 // Is x[offset2] already not a legal addressing mode? If so then
4960 // reassociating the constants breaks nothing (we test offset2 because
4961 // that's the one we hope to fold into the load or store).
4962 TargetLoweringBase::AddrMode AM;
4963 AM.HasBaseReg = true;
4964 AM.BaseOffs = C2APIntVal.getSExtValue();
4965 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4966 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4967 PtrAdd.getMF()->getFunction().getContext());
4968 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4969 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4970 AccessTy, AS))
4971 continue;
4972
4973 // Would x[offset1+offset2] still be a legal addressing mode?
4974 AM.BaseOffs = CombinedValue;
4975 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4976 AccessTy, AS))
4977 return true;
4978 }
4979
4980 return false;
4981}
4982
4984 MachineInstr *RHS,
4985 BuildFnTy &MatchInfo) const {
4986 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4987 Register Src1Reg = MI.getOperand(1).getReg();
4988 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4989 return false;
4990 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4991 if (!C2)
4992 return false;
4993
4994 // If both additions are nuw, the reassociated additions are also nuw.
4995 // If the original G_PTR_ADD is additionally nusw, X and C are both not
4996 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
4997 // therefore also nusw.
4998 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
4999 // the new G_PTR_ADDs are then also inbounds.
5000 unsigned PtrAddFlags = MI.getFlags();
5001 unsigned AddFlags = RHS->getFlags();
5002 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5003 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5004 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5005 unsigned Flags = 0;
5006 if (IsNoUWrap)
5008 if (IsNoUSWrap)
5010 if (IsInBounds)
5012
5013 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5014 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5015
5016 auto NewBase =
5017 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5018 Observer.changingInstr(MI);
5019 MI.getOperand(1).setReg(NewBase.getReg(0));
5020 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5021 MI.setFlags(Flags);
5022 Observer.changedInstr(MI);
5023 };
5024 return !reassociationCanBreakAddressingModePattern(MI);
5025}
5026
5028 MachineInstr *LHS,
5029 MachineInstr *RHS,
5030 BuildFnTy &MatchInfo) const {
5031 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5032 // if and only if (G_PTR_ADD X, C) has one use.
5033 Register LHSBase;
5034 std::optional<ValueAndVReg> LHSCstOff;
5035 if (!mi_match(MI.getBaseReg(), MRI,
5036 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5037 return false;
5038
5039 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5040
5041 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5042 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5043 // so the new G_PTR_ADDs are also inbounds.
5044 unsigned PtrAddFlags = MI.getFlags();
5045 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5046 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5047 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5049 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5051 unsigned Flags = 0;
5052 if (IsNoUWrap)
5054 if (IsNoUSWrap)
5056 if (IsInBounds)
5058
5059 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5060 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5061 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5062 // doesn't happen.
5063 LHSPtrAdd->moveBefore(&MI);
5064 Register RHSReg = MI.getOffsetReg();
5065 // set VReg will cause type mismatch if it comes from extend/trunc
5066 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5067 Observer.changingInstr(MI);
5068 MI.getOperand(2).setReg(NewCst.getReg(0));
5069 MI.setFlags(Flags);
5070 Observer.changedInstr(MI);
5071 Observer.changingInstr(*LHSPtrAdd);
5072 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5073 LHSPtrAdd->setFlags(Flags);
5074 Observer.changedInstr(*LHSPtrAdd);
5075 };
5076 return !reassociationCanBreakAddressingModePattern(MI);
5077}
5078
5080 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5081 BuildFnTy &MatchInfo) const {
5082 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5083 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5084 if (!LHSPtrAdd)
5085 return false;
5086
5087 Register Src2Reg = MI.getOperand(2).getReg();
5088 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5089 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5090 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5091 if (!C1)
5092 return false;
5093 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5094 if (!C2)
5095 return false;
5096
5097 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5098 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5099 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5100 // largest signed integer that fits into the index type, which is the maximum
5101 // size of allocated objects according to the IR Language Reference.
5102 unsigned PtrAddFlags = MI.getFlags();
5103 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5104 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5105 bool IsInBounds =
5106 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5107 unsigned Flags = 0;
5108 if (IsNoUWrap)
5110 if (IsInBounds) {
5113 }
5114
5115 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5116 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5117 Observer.changingInstr(MI);
5118 MI.getOperand(1).setReg(LHSSrc1);
5119 MI.getOperand(2).setReg(NewCst.getReg(0));
5120 MI.setFlags(Flags);
5121 Observer.changedInstr(MI);
5122 };
5123 return !reassociationCanBreakAddressingModePattern(MI);
5124}
5125
5127 BuildFnTy &MatchInfo) const {
5128 auto &PtrAdd = cast<GPtrAdd>(MI);
5129 // We're trying to match a few pointer computation patterns here for
5130 // re-association opportunities.
5131 // 1) Isolating a constant operand to be on the RHS, e.g.:
5132 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5133 //
5134 // 2) Folding two constants in each sub-tree as long as such folding
5135 // doesn't break a legal addressing mode.
5136 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5137 //
5138 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5139 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5140 // iif (G_PTR_ADD X, C) has one use.
5141 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5142 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5143
5144 // Try to match example 2.
5145 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5146 return true;
5147
5148 // Try to match example 3.
5149 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5150 return true;
5151
5152 // Try to match example 1.
5153 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5154 return true;
5155
5156 return false;
5157}
5159 Register OpLHS, Register OpRHS,
5160 BuildFnTy &MatchInfo) const {
5161 LLT OpRHSTy = MRI.getType(OpRHS);
5162 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5163
5164 if (OpLHSDef->getOpcode() != Opc)
5165 return false;
5166
5167 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5168 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5169 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5170
5171 // If the inner op is (X op C), pull the constant out so it can be folded with
5172 // other constants in the expression tree. Folding is not guaranteed so we
5173 // might have (C1 op C2). In that case do not pull a constant out because it
5174 // won't help and can lead to infinite loops.
5175 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5176 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5177 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5178 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5179 MatchInfo = [=](MachineIRBuilder &B) {
5180 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5181 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5182 };
5183 return true;
5184 }
5185 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5186 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5187 // iff (op x, c1) has one use
5188 MatchInfo = [=](MachineIRBuilder &B) {
5189 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5190 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5191 };
5192 return true;
5193 }
5194 }
5195
5196 return false;
5197}
5198
5200 BuildFnTy &MatchInfo) const {
5201 // We don't check if the reassociation will break a legal addressing mode
5202 // here since pointer arithmetic is handled by G_PTR_ADD.
5203 unsigned Opc = MI.getOpcode();
5204 Register DstReg = MI.getOperand(0).getReg();
5205 Register LHSReg = MI.getOperand(1).getReg();
5206 Register RHSReg = MI.getOperand(2).getReg();
5207
5208 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5209 return true;
5210 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5211 return true;
5212 return false;
5213}
5214
5216 APInt &MatchInfo) const {
5217 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5218 Register SrcOp = MI.getOperand(1).getReg();
5219
5220 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5221 MatchInfo = *MaybeCst;
5222 return true;
5223 }
5224
5225 return false;
5226}
5227
5229 APInt &MatchInfo) const {
5230 Register Op1 = MI.getOperand(1).getReg();
5231 Register Op2 = MI.getOperand(2).getReg();
5232 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5233 if (!MaybeCst)
5234 return false;
5235 MatchInfo = *MaybeCst;
5236 return true;
5237}
5238
5240 ConstantFP *&MatchInfo) const {
5241 Register Op1 = MI.getOperand(1).getReg();
5242 Register Op2 = MI.getOperand(2).getReg();
5243 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5244 if (!MaybeCst)
5245 return false;
5246 MatchInfo =
5247 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5248 return true;
5249}
5250
5252 ConstantFP *&MatchInfo) const {
5253 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5254 MI.getOpcode() == TargetOpcode::G_FMAD);
5255 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5256
5257 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5258 if (!Op3Cst)
5259 return false;
5260
5261 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5262 if (!Op2Cst)
5263 return false;
5264
5265 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5266 if (!Op1Cst)
5267 return false;
5268
5269 APFloat Op1F = Op1Cst->getValueAPF();
5270 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5272 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5273 return true;
5274}
5275
5278 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5279 // Look for a binop feeding into an AND with a mask:
5280 //
5281 // %add = G_ADD %lhs, %rhs
5282 // %and = G_AND %add, 000...11111111
5283 //
5284 // Check if it's possible to perform the binop at a narrower width and zext
5285 // back to the original width like so:
5286 //
5287 // %narrow_lhs = G_TRUNC %lhs
5288 // %narrow_rhs = G_TRUNC %rhs
5289 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5290 // %new_add = G_ZEXT %narrow_add
5291 // %and = G_AND %new_add, 000...11111111
5292 //
5293 // This can allow later combines to eliminate the G_AND if it turns out
5294 // that the mask is irrelevant.
5295 assert(MI.getOpcode() == TargetOpcode::G_AND);
5296 Register Dst = MI.getOperand(0).getReg();
5297 Register AndLHS = MI.getOperand(1).getReg();
5298 Register AndRHS = MI.getOperand(2).getReg();
5299 LLT WideTy = MRI.getType(Dst);
5300
5301 // If the potential binop has more than one use, then it's possible that one
5302 // of those uses will need its full width.
5303 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5304 return false;
5305
5306 // Check if the LHS feeding the AND is impacted by the high bits that we're
5307 // masking out.
5308 //
5309 // e.g. for 64-bit x, y:
5310 //
5311 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5312 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5313 if (!LHSInst)
5314 return false;
5315 unsigned LHSOpc = LHSInst->getOpcode();
5316 switch (LHSOpc) {
5317 default:
5318 return false;
5319 case TargetOpcode::G_ADD:
5320 case TargetOpcode::G_SUB:
5321 case TargetOpcode::G_MUL:
5322 case TargetOpcode::G_AND:
5323 case TargetOpcode::G_OR:
5324 case TargetOpcode::G_XOR:
5325 break;
5326 }
5327
5328 // Find the mask on the RHS.
5329 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5330 if (!Cst)
5331 return false;
5332 auto Mask = Cst->Value;
5333 if (!Mask.isMask())
5334 return false;
5335
5336 // No point in combining if there's nothing to truncate.
5337 unsigned NarrowWidth = Mask.countr_one();
5338 if (NarrowWidth == WideTy.getSizeInBits())
5339 return false;
5340 LLT NarrowTy = LLT::scalar(NarrowWidth);
5341
5342 // Check if adding the zext + truncates could be harmful.
5343 auto &MF = *MI.getMF();
5344 const auto &TLI = getTargetLowering();
5345 LLVMContext &Ctx = MF.getFunction().getContext();
5346 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5347 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5348 return false;
5349 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5350 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5351 return false;
5352 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5353 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5354 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5355 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5356 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5357 auto NarrowBinOp =
5358 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5359 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5360 Observer.changingInstr(MI);
5361 MI.getOperand(1).setReg(Ext.getReg(0));
5362 Observer.changedInstr(MI);
5363 };
5364 return true;
5365}
5366
5368 BuildFnTy &MatchInfo) const {
5369 unsigned Opc = MI.getOpcode();
5370 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5371
5372 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5373 return false;
5374
5375 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5376 Observer.changingInstr(MI);
5377 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5378 : TargetOpcode::G_SADDO;
5379 MI.setDesc(Builder.getTII().get(NewOpc));
5380 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5381 Observer.changedInstr(MI);
5382 };
5383 return true;
5384}
5385
5387 BuildFnTy &MatchInfo) const {
5388 // (G_*MULO x, 0) -> 0 + no carry out
5389 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5390 MI.getOpcode() == TargetOpcode::G_SMULO);
5391 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5392 return false;
5393 Register Dst = MI.getOperand(0).getReg();
5394 Register Carry = MI.getOperand(1).getReg();
5395 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5396 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5397 return false;
5398 MatchInfo = [=](MachineIRBuilder &B) {
5399 B.buildConstant(Dst, 0);
5400 B.buildConstant(Carry, 0);
5401 };
5402 return true;
5403}
5404
5406 BuildFnTy &MatchInfo) const {
5407 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5408 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5409 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5410 MI.getOpcode() == TargetOpcode::G_SADDE ||
5411 MI.getOpcode() == TargetOpcode::G_USUBE ||
5412 MI.getOpcode() == TargetOpcode::G_SSUBE);
5413 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5414 return false;
5415 MatchInfo = [&](MachineIRBuilder &B) {
5416 unsigned NewOpcode;
5417 switch (MI.getOpcode()) {
5418 case TargetOpcode::G_UADDE:
5419 NewOpcode = TargetOpcode::G_UADDO;
5420 break;
5421 case TargetOpcode::G_SADDE:
5422 NewOpcode = TargetOpcode::G_SADDO;
5423 break;
5424 case TargetOpcode::G_USUBE:
5425 NewOpcode = TargetOpcode::G_USUBO;
5426 break;
5427 case TargetOpcode::G_SSUBE:
5428 NewOpcode = TargetOpcode::G_SSUBO;
5429 break;
5430 }
5431 Observer.changingInstr(MI);
5432 MI.setDesc(B.getTII().get(NewOpcode));
5433 MI.removeOperand(4);
5434 Observer.changedInstr(MI);
5435 };
5436 return true;
5437}
5438
5440 BuildFnTy &MatchInfo) const {
5441 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5442 Register Dst = MI.getOperand(0).getReg();
5443 // (x + y) - z -> x (if y == z)
5444 // (x + y) - z -> y (if x == z)
5445 Register X, Y, Z;
5446 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5447 Register ReplaceReg;
5448 int64_t CstX, CstY;
5449 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5451 ReplaceReg = X;
5452 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5454 ReplaceReg = Y;
5455 if (ReplaceReg) {
5456 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5457 return true;
5458 }
5459 }
5460
5461 // x - (y + z) -> 0 - y (if x == z)
5462 // x - (y + z) -> 0 - z (if x == y)
5463 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5464 Register ReplaceReg;
5465 int64_t CstX;
5466 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5468 ReplaceReg = Y;
5469 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5471 ReplaceReg = Z;
5472 if (ReplaceReg) {
5473 MatchInfo = [=](MachineIRBuilder &B) {
5474 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5475 B.buildSub(Dst, Zero, ReplaceReg);
5476 };
5477 return true;
5478 }
5479 }
5480 return false;
5481}
5482
5484 unsigned Opcode = MI.getOpcode();
5485 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5486 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5487 Register Dst = UDivorRem.getReg(0);
5488 Register LHS = UDivorRem.getReg(1);
5489 Register RHS = UDivorRem.getReg(2);
5490 LLT Ty = MRI.getType(Dst);
5491 LLT ScalarTy = Ty.getScalarType();
5492 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5494 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5495
5496 auto &MIB = Builder;
5497
5498 bool UseSRL = false;
5499 SmallVector<Register, 16> Shifts, Factors;
5500 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5501 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5502
5503 auto BuildExactUDIVPattern = [&](const Constant *C) {
5504 // Don't recompute inverses for each splat element.
5505 if (IsSplat && !Factors.empty()) {
5506 Shifts.push_back(Shifts[0]);
5507 Factors.push_back(Factors[0]);
5508 return true;
5509 }
5510
5511 auto *CI = cast<ConstantInt>(C);
5512 APInt Divisor = CI->getValue();
5513 unsigned Shift = Divisor.countr_zero();
5514 if (Shift) {
5515 Divisor.lshrInPlace(Shift);
5516 UseSRL = true;
5517 }
5518
5519 // Calculate the multiplicative inverse modulo BW.
5520 APInt Factor = Divisor.multiplicativeInverse();
5521 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5522 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5523 return true;
5524 };
5525
5526 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5527 // Collect all magic values from the build vector.
5528 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5529 llvm_unreachable("Expected unary predicate match to succeed");
5530
5531 Register Shift, Factor;
5532 if (Ty.isVector()) {
5533 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5534 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5535 } else {
5536 Shift = Shifts[0];
5537 Factor = Factors[0];
5538 }
5539
5540 Register Res = LHS;
5541
5542 if (UseSRL)
5543 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5544
5545 return MIB.buildMul(Ty, Res, Factor);
5546 }
5547
5548 unsigned KnownLeadingZeros =
5549 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5550
5551 bool UseNPQ = false;
5552 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5553 auto BuildUDIVPattern = [&](const Constant *C) {
5554 auto *CI = cast<ConstantInt>(C);
5555 const APInt &Divisor = CI->getValue();
5556
5557 bool SelNPQ = false;
5558 APInt Magic(Divisor.getBitWidth(), 0);
5559 unsigned PreShift = 0, PostShift = 0;
5560
5561 // Magic algorithm doesn't work for division by 1. We need to emit a select
5562 // at the end.
5563 // TODO: Use undef values for divisor of 1.
5564 if (!Divisor.isOne()) {
5565
5566 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5567 // in the dividend exceeds the leading zeros for the divisor.
5570 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5571
5572 Magic = std::move(magics.Magic);
5573
5574 assert(magics.PreShift < Divisor.getBitWidth() &&
5575 "We shouldn't generate an undefined shift!");
5576 assert(magics.PostShift < Divisor.getBitWidth() &&
5577 "We shouldn't generate an undefined shift!");
5578 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5579 PreShift = magics.PreShift;
5580 PostShift = magics.PostShift;
5581 SelNPQ = magics.IsAdd;
5582 }
5583
5584 PreShifts.push_back(
5585 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5586 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5587 NPQFactors.push_back(
5588 MIB.buildConstant(ScalarTy,
5589 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5590 : APInt::getZero(EltBits))
5591 .getReg(0));
5592 PostShifts.push_back(
5593 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5594 UseNPQ |= SelNPQ;
5595 return true;
5596 };
5597
5598 // Collect the shifts/magic values from each element.
5599 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5600 (void)Matched;
5601 assert(Matched && "Expected unary predicate match to succeed");
5602
5603 Register PreShift, PostShift, MagicFactor, NPQFactor;
5604 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5605 if (RHSDef) {
5606 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5607 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5608 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5609 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5610 } else {
5611 assert(MRI.getType(RHS).isScalar() &&
5612 "Non-build_vector operation should have been a scalar");
5613 PreShift = PreShifts[0];
5614 MagicFactor = MagicFactors[0];
5615 PostShift = PostShifts[0];
5616 }
5617
5618 Register Q = LHS;
5619 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5620
5621 // Multiply the numerator (operand 0) by the magic value.
5622 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5623
5624 if (UseNPQ) {
5625 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5626
5627 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5628 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5629 if (Ty.isVector())
5630 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5631 else
5632 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5633
5634 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5635 }
5636
5637 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5638 auto One = MIB.buildConstant(Ty, 1);
5639 auto IsOne = MIB.buildICmp(
5641 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5642 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5643
5644 if (Opcode == TargetOpcode::G_UREM) {
5645 auto Prod = MIB.buildMul(Ty, ret, RHS);
5646 return MIB.buildSub(Ty, LHS, Prod);
5647 }
5648 return ret;
5649}
5650
5652 unsigned Opcode = MI.getOpcode();
5653 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5654 Register Dst = MI.getOperand(0).getReg();
5655 Register RHS = MI.getOperand(2).getReg();
5656 LLT DstTy = MRI.getType(Dst);
5657
5658 auto &MF = *MI.getMF();
5659 AttributeList Attr = MF.getFunction().getAttributes();
5660 const auto &TLI = getTargetLowering();
5661 LLVMContext &Ctx = MF.getFunction().getContext();
5662 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5663 return false;
5664
5665 // Don't do this for minsize because the instruction sequence is usually
5666 // larger.
5667 if (MF.getFunction().hasMinSize())
5668 return false;
5669
5670 if (Opcode == TargetOpcode::G_UDIV &&
5672 return matchUnaryPredicate(
5673 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5674 }
5675
5676 auto *RHSDef = MRI.getVRegDef(RHS);
5677 if (!isConstantOrConstantVector(*RHSDef, MRI))
5678 return false;
5679
5680 // Don't do this if the types are not going to be legal.
5681 if (LI) {
5682 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5683 return false;
5684 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5685 return false;
5687 {TargetOpcode::G_ICMP,
5688 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5689 DstTy}}))
5690 return false;
5691 if (Opcode == TargetOpcode::G_UREM &&
5692 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5693 return false;
5694 }
5695
5696 return matchUnaryPredicate(
5697 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5698}
5699
5701 auto *NewMI = buildUDivOrURemUsingMul(MI);
5702 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5703}
5704
5706 unsigned Opcode = MI.getOpcode();
5707 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5708 Register Dst = MI.getOperand(0).getReg();
5709 Register RHS = MI.getOperand(2).getReg();
5710 LLT DstTy = MRI.getType(Dst);
5711 auto SizeInBits = DstTy.getScalarSizeInBits();
5712 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5713
5714 auto &MF = *MI.getMF();
5715 AttributeList Attr = MF.getFunction().getAttributes();
5716 const auto &TLI = getTargetLowering();
5717 LLVMContext &Ctx = MF.getFunction().getContext();
5718 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5719 return false;
5720
5721 // Don't do this for minsize because the instruction sequence is usually
5722 // larger.
5723 if (MF.getFunction().hasMinSize())
5724 return false;
5725
5726 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5727 if (Opcode == TargetOpcode::G_SDIV &&
5729 return matchUnaryPredicate(
5730 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5731 }
5732
5733 auto *RHSDef = MRI.getVRegDef(RHS);
5734 if (!isConstantOrConstantVector(*RHSDef, MRI))
5735 return false;
5736
5737 // Don't do this if the types are not going to be legal.
5738 if (LI) {
5739 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5740 return false;
5741 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5742 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5743 return false;
5744 if (Opcode == TargetOpcode::G_SREM &&
5745 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5746 return false;
5747 }
5748
5749 return matchUnaryPredicate(
5750 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5751}
5752
5754 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5755 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5756}
5757
5759 unsigned Opcode = MI.getOpcode();
5760 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5761 Opcode == TargetOpcode::G_SREM);
5762 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5763 Register Dst = SDivorRem.getReg(0);
5764 Register LHS = SDivorRem.getReg(1);
5765 Register RHS = SDivorRem.getReg(2);
5766 LLT Ty = MRI.getType(Dst);
5767 LLT ScalarTy = Ty.getScalarType();
5768 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5770 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5771 auto &MIB = Builder;
5772
5773 bool UseSRA = false;
5774 SmallVector<Register, 16> ExactShifts, ExactFactors;
5775
5776 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5777 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5778
5779 auto BuildExactSDIVPattern = [&](const Constant *C) {
5780 // Don't recompute inverses for each splat element.
5781 if (IsSplat && !ExactFactors.empty()) {
5782 ExactShifts.push_back(ExactShifts[0]);
5783 ExactFactors.push_back(ExactFactors[0]);
5784 return true;
5785 }
5786
5787 auto *CI = cast<ConstantInt>(C);
5788 APInt Divisor = CI->getValue();
5789 unsigned Shift = Divisor.countr_zero();
5790 if (Shift) {
5791 Divisor.ashrInPlace(Shift);
5792 UseSRA = true;
5793 }
5794
5795 // Calculate the multiplicative inverse modulo BW.
5796 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5797 APInt Factor = Divisor.multiplicativeInverse();
5798 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5799 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5800 return true;
5801 };
5802
5803 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5804 // Collect all magic values from the build vector.
5805 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5806 (void)Matched;
5807 assert(Matched && "Expected unary predicate match to succeed");
5808
5809 Register Shift, Factor;
5810 if (Ty.isVector()) {
5811 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5812 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5813 } else {
5814 Shift = ExactShifts[0];
5815 Factor = ExactFactors[0];
5816 }
5817
5818 Register Res = LHS;
5819
5820 if (UseSRA)
5821 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5822
5823 return MIB.buildMul(Ty, Res, Factor);
5824 }
5825
5826 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5827
5828 auto BuildSDIVPattern = [&](const Constant *C) {
5829 auto *CI = cast<ConstantInt>(C);
5830 const APInt &Divisor = CI->getValue();
5831
5834 int NumeratorFactor = 0;
5835 int ShiftMask = -1;
5836
5837 if (Divisor.isOne() || Divisor.isAllOnes()) {
5838 // If d is +1/-1, we just multiply the numerator by +1/-1.
5839 NumeratorFactor = Divisor.getSExtValue();
5840 Magics.Magic = 0;
5841 Magics.ShiftAmount = 0;
5842 ShiftMask = 0;
5843 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5844 // If d > 0 and m < 0, add the numerator.
5845 NumeratorFactor = 1;
5846 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5847 // If d < 0 and m > 0, subtract the numerator.
5848 NumeratorFactor = -1;
5849 }
5850
5851 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5852 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5853 Shifts.push_back(
5854 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5855 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5856
5857 return true;
5858 };
5859
5860 // Collect the shifts/magic values from each element.
5861 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5862 (void)Matched;
5863 assert(Matched && "Expected unary predicate match to succeed");
5864
5865 Register MagicFactor, Factor, Shift, ShiftMask;
5866 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5867 if (RHSDef) {
5868 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5869 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5870 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5871 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5872 } else {
5873 assert(MRI.getType(RHS).isScalar() &&
5874 "Non-build_vector operation should have been a scalar");
5875 MagicFactor = MagicFactors[0];
5876 Factor = Factors[0];
5877 Shift = Shifts[0];
5878 ShiftMask = ShiftMasks[0];
5879 }
5880
5881 Register Q = LHS;
5882 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5883
5884 // (Optionally) Add/subtract the numerator using Factor.
5885 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5886 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5887
5888 // Shift right algebraic by shift value.
5889 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5890
5891 // Extract the sign bit, mask it and add it to the quotient.
5892 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5893 auto T = MIB.buildLShr(Ty, Q, SignShift);
5894 T = MIB.buildAnd(Ty, T, ShiftMask);
5895 auto ret = MIB.buildAdd(Ty, Q, T);
5896
5897 if (Opcode == TargetOpcode::G_SREM) {
5898 auto Prod = MIB.buildMul(Ty, ret, RHS);
5899 return MIB.buildSub(Ty, LHS, Prod);
5900 }
5901 return ret;
5902}
5903
5905 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5906 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5907 "Expected SDIV or UDIV");
5908 auto &Div = cast<GenericMachineInstr>(MI);
5909 Register RHS = Div.getReg(2);
5910 auto MatchPow2 = [&](const Constant *C) {
5911 auto *CI = dyn_cast<ConstantInt>(C);
5912 return CI && (CI->getValue().isPowerOf2() ||
5913 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5914 };
5915 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5916}
5917
5919 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5920 auto &SDiv = cast<GenericMachineInstr>(MI);
5921 Register Dst = SDiv.getReg(0);
5922 Register LHS = SDiv.getReg(1);
5923 Register RHS = SDiv.getReg(2);
5924 LLT Ty = MRI.getType(Dst);
5926 LLT CCVT =
5927 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5928
5929 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5930 // to the following version:
5931 //
5932 // %c1 = G_CTTZ %rhs
5933 // %inexact = G_SUB $bitwidth, %c1
5934 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5935 // %lshr = G_LSHR %sign, %inexact
5936 // %add = G_ADD %lhs, %lshr
5937 // %ashr = G_ASHR %add, %c1
5938 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5939 // %zero = G_CONSTANT $0
5940 // %neg = G_NEG %ashr
5941 // %isneg = G_ICMP SLT %rhs, %zero
5942 // %res = G_SELECT %isneg, %neg, %ashr
5943
5944 unsigned BitWidth = Ty.getScalarSizeInBits();
5945 auto Zero = Builder.buildConstant(Ty, 0);
5946
5947 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5948 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5949 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5950 // Splat the sign bit into the register
5951 auto Sign = Builder.buildAShr(
5952 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5953
5954 // Add (LHS < 0) ? abs2 - 1 : 0;
5955 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5956 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5957 auto AShr = Builder.buildAShr(Ty, Add, C1);
5958
5959 // Special case: (sdiv X, 1) -> X
5960 // Special Case: (sdiv X, -1) -> 0-X
5961 auto One = Builder.buildConstant(Ty, 1);
5962 auto MinusOne = Builder.buildConstant(Ty, -1);
5963 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5964 auto IsMinusOne =
5965 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
5966 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5967 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5968
5969 // If divided by a positive value, we're done. Otherwise, the result must be
5970 // negated.
5971 auto Neg = Builder.buildNeg(Ty, AShr);
5972 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5973 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5974 MI.eraseFromParent();
5975}
5976
5978 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5979 auto &UDiv = cast<GenericMachineInstr>(MI);
5980 Register Dst = UDiv.getReg(0);
5981 Register LHS = UDiv.getReg(1);
5982 Register RHS = UDiv.getReg(2);
5983 LLT Ty = MRI.getType(Dst);
5985
5986 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5987 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5988 MI.eraseFromParent();
5989}
5990
5992 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5993 Register RHS = MI.getOperand(2).getReg();
5994 Register Dst = MI.getOperand(0).getReg();
5995 LLT Ty = MRI.getType(Dst);
5996 LLT RHSTy = MRI.getType(RHS);
5998 auto MatchPow2ExceptOne = [&](const Constant *C) {
5999 if (auto *CI = dyn_cast<ConstantInt>(C))
6000 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6001 return false;
6002 };
6003 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6004 return false;
6005 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6006 // get log base 2, and it is not always legal for on a target.
6007 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6008 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6009}
6010
6012 Register LHS = MI.getOperand(1).getReg();
6013 Register RHS = MI.getOperand(2).getReg();
6014 Register Dst = MI.getOperand(0).getReg();
6015 LLT Ty = MRI.getType(Dst);
6017 unsigned NumEltBits = Ty.getScalarSizeInBits();
6018
6019 auto LogBase2 = buildLogBase2(RHS, Builder);
6020 auto ShiftAmt =
6021 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6022 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6023 Builder.buildLShr(Dst, LHS, Trunc);
6024 MI.eraseFromParent();
6025}
6026
6028 Register &MatchInfo) const {
6029 Register Dst = MI.getOperand(0).getReg();
6030 Register Src = MI.getOperand(1).getReg();
6031 LLT DstTy = MRI.getType(Dst);
6032 LLT SrcTy = MRI.getType(Src);
6033 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6034 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6035 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6036
6037 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6038 return false;
6039
6040 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6041 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6042 return mi_match(Src, MRI,
6043 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6044 m_SpecificICstOrSplat(SignedMin)),
6045 m_SpecificICstOrSplat(SignedMax))) ||
6046 mi_match(Src, MRI,
6047 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6048 m_SpecificICstOrSplat(SignedMax)),
6049 m_SpecificICstOrSplat(SignedMin)));
6050}
6051
6053 Register &MatchInfo) const {
6054 Register Dst = MI.getOperand(0).getReg();
6055 Builder.buildTruncSSatS(Dst, MatchInfo);
6056 MI.eraseFromParent();
6057}
6058
6060 Register &MatchInfo) const {
6061 Register Dst = MI.getOperand(0).getReg();
6062 Register Src = MI.getOperand(1).getReg();
6063 LLT DstTy = MRI.getType(Dst);
6064 LLT SrcTy = MRI.getType(Src);
6065 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6066 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6067 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6068
6069 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6070 return false;
6071 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6072 return mi_match(Src, MRI,
6074 m_SpecificICstOrSplat(UnsignedMax))) ||
6075 mi_match(Src, MRI,
6076 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6077 m_SpecificICstOrSplat(UnsignedMax)),
6078 m_SpecificICstOrSplat(0))) ||
6079 mi_match(Src, MRI,
6081 m_SpecificICstOrSplat(UnsignedMax)));
6082}
6083
6085 Register &MatchInfo) const {
6086 Register Dst = MI.getOperand(0).getReg();
6087 Builder.buildTruncSSatU(Dst, MatchInfo);
6088 MI.eraseFromParent();
6089}
6090
6092 MachineInstr &MinMI) const {
6093 Register Min = MinMI.getOperand(2).getReg();
6094 Register Val = MinMI.getOperand(1).getReg();
6095 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6096 LLT SrcTy = MRI.getType(Val);
6097 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6098 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6099 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6100
6101 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6102 return false;
6103 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6104 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6105 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6106}
6107
6109 MachineInstr &SrcMI) const {
6110 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6111 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6112
6113 return LI &&
6114 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6115}
6116
6118 BuildFnTy &MatchInfo) const {
6119 unsigned Opc = MI.getOpcode();
6120 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6121 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6122 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6123
6124 Register Dst = MI.getOperand(0).getReg();
6125 Register X = MI.getOperand(1).getReg();
6126 Register Y = MI.getOperand(2).getReg();
6127 LLT Type = MRI.getType(Dst);
6128
6129 // fold (fadd x, fneg(y)) -> (fsub x, y)
6130 // fold (fadd fneg(y), x) -> (fsub x, y)
6131 // G_ADD is commutative so both cases are checked by m_GFAdd
6132 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6133 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6134 Opc = TargetOpcode::G_FSUB;
6135 }
6136 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6137 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6138 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6139 Opc = TargetOpcode::G_FADD;
6140 }
6141 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6142 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6143 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6144 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6145 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6146 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6147 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6148 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6149 // no opcode change
6150 } else
6151 return false;
6152
6153 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6154 Observer.changingInstr(MI);
6155 MI.setDesc(B.getTII().get(Opc));
6156 MI.getOperand(1).setReg(X);
6157 MI.getOperand(2).setReg(Y);
6158 Observer.changedInstr(MI);
6159 };
6160 return true;
6161}
6162
6164 Register &MatchInfo) const {
6165 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6166
6167 Register LHS = MI.getOperand(1).getReg();
6168 MatchInfo = MI.getOperand(2).getReg();
6169 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6170
6171 const auto LHSCst = Ty.isVector()
6172 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6174 if (!LHSCst)
6175 return false;
6176
6177 // -0.0 is always allowed
6178 if (LHSCst->Value.isNegZero())
6179 return true;
6180
6181 // +0.0 is only allowed if nsz is set.
6182 if (LHSCst->Value.isPosZero())
6183 return MI.getFlag(MachineInstr::FmNsz);
6184
6185 return false;
6186}
6187
6189 Register &MatchInfo) const {
6190 Register Dst = MI.getOperand(0).getReg();
6191 Builder.buildFNeg(
6192 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6193 eraseInst(MI);
6194}
6195
6196/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6197/// due to global flags or MachineInstr flags.
6198static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6199 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6200 return false;
6201 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6202}
6203
6204static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6205 const MachineRegisterInfo &MRI) {
6206 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6207 MRI.use_instr_nodbg_end()) >
6208 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6209 MRI.use_instr_nodbg_end());
6210}
6211
6213 bool &AllowFusionGlobally,
6214 bool &HasFMAD, bool &Aggressive,
6215 bool CanReassociate) const {
6216
6217 auto *MF = MI.getMF();
6218 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6219 const TargetOptions &Options = MF->getTarget().Options;
6220 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6221
6222 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6223 return false;
6224
6225 // Floating-point multiply-add with intermediate rounding.
6226 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6227 // Floating-point multiply-add without intermediate rounding.
6228 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6229 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6230 // No valid opcode, do not combine.
6231 if (!HasFMAD && !HasFMA)
6232 return false;
6233
6234 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6235 // If the addition is not contractable, do not combine.
6236 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6237 return false;
6238
6239 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6240 return true;
6241}
6242
6245 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6246 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6247
6248 bool AllowFusionGlobally, HasFMAD, Aggressive;
6249 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6250 return false;
6251
6252 Register Op1 = MI.getOperand(1).getReg();
6253 Register Op2 = MI.getOperand(2).getReg();
6254 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6255 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6256 unsigned PreferredFusedOpcode =
6257 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6258
6259 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6260 // prefer to fold the multiply with fewer uses.
6261 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6262 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6263 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6264 std::swap(LHS, RHS);
6265 }
6266
6267 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6268 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6269 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6270 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6271 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6272 {LHS.MI->getOperand(1).getReg(),
6273 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6274 };
6275 return true;
6276 }
6277
6278 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6279 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6280 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6281 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6282 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6283 {RHS.MI->getOperand(1).getReg(),
6284 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6285 };
6286 return true;
6287 }
6288
6289 return false;
6290}
6291
6294 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6295 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6296
6297 bool AllowFusionGlobally, HasFMAD, Aggressive;
6298 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6299 return false;
6300
6301 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6302 Register Op1 = MI.getOperand(1).getReg();
6303 Register Op2 = MI.getOperand(2).getReg();
6304 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6305 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6306 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6307
6308 unsigned PreferredFusedOpcode =
6309 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6310
6311 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6312 // prefer to fold the multiply with fewer uses.
6313 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6314 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6315 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6316 std::swap(LHS, RHS);
6317 }
6318
6319 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6320 MachineInstr *FpExtSrc;
6321 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6322 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6323 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6324 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6325 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6326 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6327 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6328 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6329 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6330 };
6331 return true;
6332 }
6333
6334 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6335 // Note: Commutes FADD operands.
6336 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6337 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6338 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6339 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6340 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6341 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6342 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6343 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6344 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6345 };
6346 return true;
6347 }
6348
6349 return false;
6350}
6351
6354 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6355 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6356
6357 bool AllowFusionGlobally, HasFMAD, Aggressive;
6358 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6359 return false;
6360
6361 Register Op1 = MI.getOperand(1).getReg();
6362 Register Op2 = MI.getOperand(2).getReg();
6363 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6364 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6365 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6366
6367 unsigned PreferredFusedOpcode =
6368 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6369
6370 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6371 // prefer to fold the multiply with fewer uses.
6372 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6373 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6374 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6375 std::swap(LHS, RHS);
6376 }
6377
6378 MachineInstr *FMA = nullptr;
6379 Register Z;
6380 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6381 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6382 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6383 TargetOpcode::G_FMUL) &&
6384 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6385 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6386 FMA = LHS.MI;
6387 Z = RHS.Reg;
6388 }
6389 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6390 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6391 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6392 TargetOpcode::G_FMUL) &&
6393 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6394 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6395 Z = LHS.Reg;
6396 FMA = RHS.MI;
6397 }
6398
6399 if (FMA) {
6400 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6401 Register X = FMA->getOperand(1).getReg();
6402 Register Y = FMA->getOperand(2).getReg();
6403 Register U = FMulMI->getOperand(1).getReg();
6404 Register V = FMulMI->getOperand(2).getReg();
6405
6406 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6407 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6408 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6409 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6410 {X, Y, InnerFMA});
6411 };
6412 return true;
6413 }
6414
6415 return false;
6416}
6417
6420 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6421 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6422
6423 bool AllowFusionGlobally, HasFMAD, Aggressive;
6424 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6425 return false;
6426
6427 if (!Aggressive)
6428 return false;
6429
6430 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6431 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6432 Register Op1 = MI.getOperand(1).getReg();
6433 Register Op2 = MI.getOperand(2).getReg();
6434 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6435 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6436
6437 unsigned PreferredFusedOpcode =
6438 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6439
6440 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6441 // prefer to fold the multiply with fewer uses.
6442 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6443 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6444 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6445 std::swap(LHS, RHS);
6446 }
6447
6448 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6449 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6451 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6452 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6453 Register InnerFMA =
6454 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6455 .getReg(0);
6456 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6457 {X, Y, InnerFMA});
6458 };
6459
6460 MachineInstr *FMulMI, *FMAMI;
6461 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6462 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6463 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6464 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6465 m_GFPExt(m_MInstr(FMulMI))) &&
6466 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6467 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6468 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6469 MatchInfo = [=](MachineIRBuilder &B) {
6470 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6471 FMulMI->getOperand(2).getReg(), RHS.Reg,
6472 LHS.MI->getOperand(1).getReg(),
6473 LHS.MI->getOperand(2).getReg(), B);
6474 };
6475 return true;
6476 }
6477
6478 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6479 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6480 // FIXME: This turns two single-precision and one double-precision
6481 // operation into two double-precision operations, which might not be
6482 // interesting for all targets, especially GPUs.
6483 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6484 FMAMI->getOpcode() == PreferredFusedOpcode) {
6485 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6486 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6487 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6488 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6489 MatchInfo = [=](MachineIRBuilder &B) {
6490 Register X = FMAMI->getOperand(1).getReg();
6491 Register Y = FMAMI->getOperand(2).getReg();
6492 X = B.buildFPExt(DstType, X).getReg(0);
6493 Y = B.buildFPExt(DstType, Y).getReg(0);
6494 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6495 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6496 };
6497
6498 return true;
6499 }
6500 }
6501
6502 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6503 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6504 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6505 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6506 m_GFPExt(m_MInstr(FMulMI))) &&
6507 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6508 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6509 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6510 MatchInfo = [=](MachineIRBuilder &B) {
6511 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6512 FMulMI->getOperand(2).getReg(), LHS.Reg,
6513 RHS.MI->getOperand(1).getReg(),
6514 RHS.MI->getOperand(2).getReg(), B);
6515 };
6516 return true;
6517 }
6518
6519 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6520 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6521 // FIXME: This turns two single-precision and one double-precision
6522 // operation into two double-precision operations, which might not be
6523 // interesting for all targets, especially GPUs.
6524 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6525 FMAMI->getOpcode() == PreferredFusedOpcode) {
6526 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6527 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6528 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6529 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6530 MatchInfo = [=](MachineIRBuilder &B) {
6531 Register X = FMAMI->getOperand(1).getReg();
6532 Register Y = FMAMI->getOperand(2).getReg();
6533 X = B.buildFPExt(DstType, X).getReg(0);
6534 Y = B.buildFPExt(DstType, Y).getReg(0);
6535 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6536 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6537 };
6538 return true;
6539 }
6540 }
6541
6542 return false;
6543}
6544
6547 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6548 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6549
6550 bool AllowFusionGlobally, HasFMAD, Aggressive;
6551 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6552 return false;
6553
6554 Register Op1 = MI.getOperand(1).getReg();
6555 Register Op2 = MI.getOperand(2).getReg();
6556 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6557 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6558 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6559
6560 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6561 // prefer to fold the multiply with fewer uses.
6562 int FirstMulHasFewerUses = true;
6563 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6564 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6565 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6566 FirstMulHasFewerUses = false;
6567
6568 unsigned PreferredFusedOpcode =
6569 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6570
6571 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6572 if (FirstMulHasFewerUses &&
6573 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6574 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6575 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6576 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6577 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6578 {LHS.MI->getOperand(1).getReg(),
6579 LHS.MI->getOperand(2).getReg(), NegZ});
6580 };
6581 return true;
6582 }
6583 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6584 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6585 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6586 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6587 Register NegY =
6588 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6589 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6590 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6591 };
6592 return true;
6593 }
6594
6595 return false;
6596}
6597
6600 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6601 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6602
6603 bool AllowFusionGlobally, HasFMAD, Aggressive;
6604 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6605 return false;
6606
6607 Register LHSReg = MI.getOperand(1).getReg();
6608 Register RHSReg = MI.getOperand(2).getReg();
6609 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6610
6611 unsigned PreferredFusedOpcode =
6612 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6613
6614 MachineInstr *FMulMI;
6615 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6616 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6617 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6618 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6619 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6620 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6621 Register NegX =
6622 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6623 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6624 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6625 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6626 };
6627 return true;
6628 }
6629
6630 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6631 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6632 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6633 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6634 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6635 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6636 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6637 {FMulMI->getOperand(1).getReg(),
6638 FMulMI->getOperand(2).getReg(), LHSReg});
6639 };
6640 return true;
6641 }
6642
6643 return false;
6644}
6645
6648 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6649 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6650
6651 bool AllowFusionGlobally, HasFMAD, Aggressive;
6652 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6653 return false;
6654
6655 Register LHSReg = MI.getOperand(1).getReg();
6656 Register RHSReg = MI.getOperand(2).getReg();
6657 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6658
6659 unsigned PreferredFusedOpcode =
6660 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6661
6662 MachineInstr *FMulMI;
6663 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6664 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6665 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6666 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6667 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6668 Register FpExtX =
6669 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6670 Register FpExtY =
6671 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6672 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6673 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6674 {FpExtX, FpExtY, NegZ});
6675 };
6676 return true;
6677 }
6678
6679 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6680 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6681 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6682 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6683 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6684 Register FpExtY =
6685 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6686 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6687 Register FpExtZ =
6688 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6689 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6690 {NegY, FpExtZ, LHSReg});
6691 };
6692 return true;
6693 }
6694
6695 return false;
6696}
6697
6700 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6701 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6702
6703 bool AllowFusionGlobally, HasFMAD, Aggressive;
6704 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6705 return false;
6706
6707 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6708 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6709 Register LHSReg = MI.getOperand(1).getReg();
6710 Register RHSReg = MI.getOperand(2).getReg();
6711
6712 unsigned PreferredFusedOpcode =
6713 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6714
6715 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6717 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6718 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6719 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6720 };
6721
6722 MachineInstr *FMulMI;
6723 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6724 // (fneg (fma (fpext x), (fpext y), z))
6725 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6726 // (fneg (fma (fpext x), (fpext y), z))
6727 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6728 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6729 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6730 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6731 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6732 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6733 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6734 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6735 FMulMI->getOperand(2).getReg(), RHSReg, B);
6736 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6737 };
6738 return true;
6739 }
6740
6741 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6742 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6743 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6744 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6745 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6746 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6747 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6748 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6749 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6750 FMulMI->getOperand(2).getReg(), LHSReg, B);
6751 };
6752 return true;
6753 }
6754
6755 return false;
6756}
6757
6759 unsigned &IdxToPropagate) const {
6760 bool PropagateNaN;
6761 switch (MI.getOpcode()) {
6762 default:
6763 return false;
6764 case TargetOpcode::G_FMINNUM:
6765 case TargetOpcode::G_FMAXNUM:
6766 PropagateNaN = false;
6767 break;
6768 case TargetOpcode::G_FMINIMUM:
6769 case TargetOpcode::G_FMAXIMUM:
6770 PropagateNaN = true;
6771 break;
6772 }
6773
6774 auto MatchNaN = [&](unsigned Idx) {
6775 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6776 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6777 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6778 return false;
6779 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6780 return true;
6781 };
6782
6783 return MatchNaN(1) || MatchNaN(2);
6784}
6785
6786// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6787// reciprocal.
6788// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6790 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6791 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6792
6793 Register X = MI.getOperand(1).getReg();
6794 Register Y = MI.getOperand(2).getReg();
6795
6796 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6797 return false;
6798
6799 // Skip if current node is a reciprocal/fneg-reciprocal.
6800 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6801 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6802 return false;
6803
6804 // Exit early if the target does not want this transform or if there can't
6805 // possibly be enough uses of the divisor to make the transform worthwhile.
6806 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6807 if (!MinUses)
6808 return false;
6809
6810 // Find all FDIV users of the same divisor. For the moment we limit all
6811 // instructions to a single BB and use the first Instr in MatchInfo as the
6812 // dominating position.
6813 MatchInfo.push_back(&MI);
6814 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6815 if (&U == &MI || U.getParent() != MI.getParent())
6816 continue;
6817 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6818 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
6819 // This division is eligible for optimization only if global unsafe math
6820 // is enabled or if this division allows reciprocal formation.
6821 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6822 MatchInfo.push_back(&U);
6823 if (dominates(U, *MatchInfo[0]))
6824 std::swap(MatchInfo[0], MatchInfo.back());
6825 }
6826 }
6827 }
6828
6829 // Now that we have the actual number of divisor uses, make sure it meets
6830 // the minimum threshold specified by the target.
6831 return MatchInfo.size() >= MinUses;
6832}
6833
6835 SmallVector<MachineInstr *> &MatchInfo) const {
6836 // Generate the new div at the position of the first instruction, that we have
6837 // ensured will dominate all other instructions.
6838 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6839 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6840 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6841 MatchInfo[0]->getOperand(2).getReg(),
6842 MatchInfo[0]->getFlags());
6843
6844 // Replace all found div's with fmul instructions.
6845 for (MachineInstr *MI : MatchInfo) {
6846 Builder.setInsertPt(*MI->getParent(), MI);
6847 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6848 Div->getOperand(0).getReg(), MI->getFlags());
6849 MI->eraseFromParent();
6850 }
6851}
6852
6854 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6855 Register LHS = MI.getOperand(1).getReg();
6856 Register RHS = MI.getOperand(2).getReg();
6857
6858 // Helper lambda to check for opportunities for
6859 // A + (B - A) -> B
6860 // (B - A) + A -> B
6861 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6862 Register Reg;
6863 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6864 Reg == MaybeSameReg;
6865 };
6866 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6867}
6868
6870 Register &MatchInfo) const {
6871 // This combine folds the following patterns:
6872 //
6873 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6874 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6875 // into
6876 // x
6877 // if
6878 // k == sizeof(VecEltTy)/2
6879 // type(x) == type(dst)
6880 //
6881 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6882 // into
6883 // x
6884 // if
6885 // type(x) == type(dst)
6886
6887 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6888 LLT DstEltTy = DstVecTy.getElementType();
6889
6890 Register Lo, Hi;
6891
6892 if (mi_match(
6893 MI, MRI,
6895 MatchInfo = Lo;
6896 return MRI.getType(MatchInfo) == DstVecTy;
6897 }
6898
6899 std::optional<ValueAndVReg> ShiftAmount;
6900 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6901 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6902 if (mi_match(
6903 MI, MRI,
6904 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6905 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6906 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6907 MatchInfo = Lo;
6908 return MRI.getType(MatchInfo) == DstVecTy;
6909 }
6910 }
6911
6912 return false;
6913}
6914
6916 Register &MatchInfo) const {
6917 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6918 // if type(x) == type(G_TRUNC)
6919 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6920 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6921 return false;
6922
6923 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6924}
6925
6927 Register &MatchInfo) const {
6928 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6929 // y if K == size of vector element type
6930 std::optional<ValueAndVReg> ShiftAmt;
6931 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6933 m_GCst(ShiftAmt))))
6934 return false;
6935
6936 LLT MatchTy = MRI.getType(MatchInfo);
6937 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6938 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6939}
6940
6941unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6942 CmpInst::Predicate Pred, LLT DstTy,
6943 SelectPatternNaNBehaviour VsNaNRetVal) const {
6944 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6945 "Expected a NaN behaviour?");
6946 // Choose an opcode based off of legality or the behaviour when one of the
6947 // LHS/RHS may be NaN.
6948 switch (Pred) {
6949 default:
6950 return 0;
6951 case CmpInst::FCMP_UGT:
6952 case CmpInst::FCMP_UGE:
6953 case CmpInst::FCMP_OGT:
6954 case CmpInst::FCMP_OGE:
6955 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6956 return TargetOpcode::G_FMAXNUM;
6957 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6958 return TargetOpcode::G_FMAXIMUM;
6959 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6960 return TargetOpcode::G_FMAXNUM;
6961 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6962 return TargetOpcode::G_FMAXIMUM;
6963 return 0;
6964 case CmpInst::FCMP_ULT:
6965 case CmpInst::FCMP_ULE:
6966 case CmpInst::FCMP_OLT:
6967 case CmpInst::FCMP_OLE:
6968 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6969 return TargetOpcode::G_FMINNUM;
6970 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6971 return TargetOpcode::G_FMINIMUM;
6972 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6973 return TargetOpcode::G_FMINNUM;
6974 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6975 return 0;
6976 return TargetOpcode::G_FMINIMUM;
6977 }
6978}
6979
6980CombinerHelper::SelectPatternNaNBehaviour
6981CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6982 bool IsOrderedComparison) const {
6983 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6984 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6985 // Completely unsafe.
6986 if (!LHSSafe && !RHSSafe)
6987 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6988 if (LHSSafe && RHSSafe)
6989 return SelectPatternNaNBehaviour::RETURNS_ANY;
6990 // An ordered comparison will return false when given a NaN, so it
6991 // returns the RHS.
6992 if (IsOrderedComparison)
6993 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6994 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6995 // An unordered comparison will return true when given a NaN, so it
6996 // returns the LHS.
6997 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6998 : SelectPatternNaNBehaviour::RETURNS_NAN;
6999}
7000
7001bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7002 Register TrueVal, Register FalseVal,
7003 BuildFnTy &MatchInfo) const {
7004 // Match: select (fcmp cond x, y) x, y
7005 // select (fcmp cond x, y) y, x
7006 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7007 LLT DstTy = MRI.getType(Dst);
7008 // Bail out early on pointers, since we'll never want to fold to a min/max.
7009 if (DstTy.isPointer())
7010 return false;
7011 // Match a floating point compare with a less-than/greater-than predicate.
7012 // TODO: Allow multiple users of the compare if they are all selects.
7013 CmpInst::Predicate Pred;
7014 Register CmpLHS, CmpRHS;
7015 if (!mi_match(Cond, MRI,
7017 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7018 CmpInst::isEquality(Pred))
7019 return false;
7020 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7021 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7022 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7023 return false;
7024 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7025 std::swap(CmpLHS, CmpRHS);
7026 Pred = CmpInst::getSwappedPredicate(Pred);
7027 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7028 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7029 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7030 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7031 }
7032 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7033 return false;
7034 // Decide what type of max/min this should be based off of the predicate.
7035 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7036 if (!Opc || !isLegal({Opc, {DstTy}}))
7037 return false;
7038 // Comparisons between signed zero and zero may have different results...
7039 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7040 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7041 // We don't know if a comparison between two 0s will give us a consistent
7042 // result. Be conservative and only proceed if at least one side is
7043 // non-zero.
7044 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7045 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7046 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7047 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7048 return false;
7049 }
7050 }
7051 MatchInfo = [=](MachineIRBuilder &B) {
7052 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7053 };
7054 return true;
7055}
7056
7058 BuildFnTy &MatchInfo) const {
7059 // TODO: Handle integer cases.
7060 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7061 // Condition may be fed by a truncated compare.
7062 Register Cond = MI.getOperand(1).getReg();
7063 Register MaybeTrunc;
7064 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7065 Cond = MaybeTrunc;
7066 Register Dst = MI.getOperand(0).getReg();
7067 Register TrueVal = MI.getOperand(2).getReg();
7068 Register FalseVal = MI.getOperand(3).getReg();
7069 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7070}
7071
7073 BuildFnTy &MatchInfo) const {
7074 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7075 // (X + Y) == X --> Y == 0
7076 // (X + Y) != X --> Y != 0
7077 // (X - Y) == X --> Y == 0
7078 // (X - Y) != X --> Y != 0
7079 // (X ^ Y) == X --> Y == 0
7080 // (X ^ Y) != X --> Y != 0
7081 Register Dst = MI.getOperand(0).getReg();
7082 CmpInst::Predicate Pred;
7083 Register X, Y, OpLHS, OpRHS;
7084 bool MatchedSub = mi_match(
7085 Dst, MRI,
7086 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7087 if (MatchedSub && X != OpLHS)
7088 return false;
7089 if (!MatchedSub) {
7090 if (!mi_match(Dst, MRI,
7091 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7092 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7093 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7094 return false;
7095 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7096 }
7097 MatchInfo = [=](MachineIRBuilder &B) {
7098 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7099 B.buildICmp(Pred, Dst, Y, Zero);
7100 };
7101 return CmpInst::isEquality(Pred) && Y.isValid();
7102}
7103
7104/// Return the minimum useless shift amount that results in complete loss of the
7105/// source value. Return std::nullopt when it cannot determine a value.
7106static std::optional<unsigned>
7107getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7108 std::optional<int64_t> &Result) {
7109 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7110 Opcode == TargetOpcode::G_ASHR) &&
7111 "Expect G_SHL, G_LSHR or G_ASHR.");
7112 auto SignificantBits = 0;
7113 switch (Opcode) {
7114 case TargetOpcode::G_SHL:
7115 SignificantBits = ValueKB.countMinTrailingZeros();
7116 Result = 0;
7117 break;
7118 case TargetOpcode::G_LSHR:
7119 Result = 0;
7120 SignificantBits = ValueKB.countMinLeadingZeros();
7121 break;
7122 case TargetOpcode::G_ASHR:
7123 if (ValueKB.isNonNegative()) {
7124 SignificantBits = ValueKB.countMinLeadingZeros();
7125 Result = 0;
7126 } else if (ValueKB.isNegative()) {
7127 SignificantBits = ValueKB.countMinLeadingOnes();
7128 Result = -1;
7129 } else {
7130 // Cannot determine shift result.
7131 Result = std::nullopt;
7132 }
7133 break;
7134 default:
7135 break;
7136 }
7137 return ValueKB.getBitWidth() - SignificantBits;
7138}
7139
7141 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7142 Register ShiftVal = MI.getOperand(1).getReg();
7143 Register ShiftReg = MI.getOperand(2).getReg();
7144 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7145 auto IsShiftTooBig = [&](const Constant *C) {
7146 auto *CI = dyn_cast<ConstantInt>(C);
7147 if (!CI)
7148 return false;
7149 if (CI->uge(ResTy.getScalarSizeInBits())) {
7150 MatchInfo = std::nullopt;
7151 return true;
7152 }
7153 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7154 MI.getOpcode(), MatchInfo);
7155 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7156 };
7157 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7158}
7159
7161 unsigned LHSOpndIdx = 1;
7162 unsigned RHSOpndIdx = 2;
7163 switch (MI.getOpcode()) {
7164 case TargetOpcode::G_UADDO:
7165 case TargetOpcode::G_SADDO:
7166 case TargetOpcode::G_UMULO:
7167 case TargetOpcode::G_SMULO:
7168 LHSOpndIdx = 2;
7169 RHSOpndIdx = 3;
7170 break;
7171 default:
7172 break;
7173 }
7174 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7175 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7176 if (!getIConstantVRegVal(LHS, MRI)) {
7177 // Skip commuting if LHS is not a constant. But, LHS may be a
7178 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7179 // have a constant on the RHS.
7180 if (MRI.getVRegDef(LHS)->getOpcode() !=
7181 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7182 return false;
7183 }
7184 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7185 return MRI.getVRegDef(RHS)->getOpcode() !=
7186 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7187 !getIConstantVRegVal(RHS, MRI);
7188}
7189
7191 Register LHS = MI.getOperand(1).getReg();
7192 Register RHS = MI.getOperand(2).getReg();
7193 std::optional<FPValueAndVReg> ValAndVReg;
7194 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7195 return false;
7196 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7197}
7198
7200 Observer.changingInstr(MI);
7201 unsigned LHSOpndIdx = 1;
7202 unsigned RHSOpndIdx = 2;
7203 switch (MI.getOpcode()) {
7204 case TargetOpcode::G_UADDO:
7205 case TargetOpcode::G_SADDO:
7206 case TargetOpcode::G_UMULO:
7207 case TargetOpcode::G_SMULO:
7208 LHSOpndIdx = 2;
7209 RHSOpndIdx = 3;
7210 break;
7211 default:
7212 break;
7213 }
7214 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7215 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7216 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7217 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7218 Observer.changedInstr(MI);
7219}
7220
7221bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7222 LLT SrcTy = MRI.getType(Src);
7223 if (SrcTy.isFixedVector())
7224 return isConstantSplatVector(Src, 1, AllowUndefs);
7225 if (SrcTy.isScalar()) {
7226 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7227 return true;
7228 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7229 return IConstant && IConstant->Value == 1;
7230 }
7231 return false; // scalable vector
7232}
7233
7234bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7235 LLT SrcTy = MRI.getType(Src);
7236 if (SrcTy.isFixedVector())
7237 return isConstantSplatVector(Src, 0, AllowUndefs);
7238 if (SrcTy.isScalar()) {
7239 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7240 return true;
7241 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7242 return IConstant && IConstant->Value == 0;
7243 }
7244 return false; // scalable vector
7245}
7246
7247// Ignores COPYs during conformance checks.
7248// FIXME scalable vectors.
7249bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7250 bool AllowUndefs) const {
7251 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7252 if (!BuildVector)
7253 return false;
7254 unsigned NumSources = BuildVector->getNumSources();
7255
7256 for (unsigned I = 0; I < NumSources; ++I) {
7257 GImplicitDef *ImplicitDef =
7259 if (ImplicitDef && AllowUndefs)
7260 continue;
7261 if (ImplicitDef && !AllowUndefs)
7262 return false;
7263 std::optional<ValueAndVReg> IConstant =
7265 if (IConstant && IConstant->Value == SplatValue)
7266 continue;
7267 return false;
7268 }
7269 return true;
7270}
7271
7272// Ignores COPYs during lookups.
7273// FIXME scalable vectors
7274std::optional<APInt>
7275CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7276 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7277 if (IConstant)
7278 return IConstant->Value;
7279
7280 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7281 if (!BuildVector)
7282 return std::nullopt;
7283 unsigned NumSources = BuildVector->getNumSources();
7284
7285 std::optional<APInt> Value = std::nullopt;
7286 for (unsigned I = 0; I < NumSources; ++I) {
7287 std::optional<ValueAndVReg> IConstant =
7289 if (!IConstant)
7290 return std::nullopt;
7291 if (!Value)
7292 Value = IConstant->Value;
7293 else if (*Value != IConstant->Value)
7294 return std::nullopt;
7295 }
7296 return Value;
7297}
7298
7299// FIXME G_SPLAT_VECTOR
7300bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7301 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7302 if (IConstant)
7303 return true;
7304
7305 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7306 if (!BuildVector)
7307 return false;
7308
7309 unsigned NumSources = BuildVector->getNumSources();
7310 for (unsigned I = 0; I < NumSources; ++I) {
7311 std::optional<ValueAndVReg> IConstant =
7313 if (!IConstant)
7314 return false;
7315 }
7316 return true;
7317}
7318
7319// TODO: use knownbits to determine zeros
7320bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7321 BuildFnTy &MatchInfo) const {
7322 uint32_t Flags = Select->getFlags();
7323 Register Dest = Select->getReg(0);
7324 Register Cond = Select->getCondReg();
7325 Register True = Select->getTrueReg();
7326 Register False = Select->getFalseReg();
7327 LLT CondTy = MRI.getType(Select->getCondReg());
7328 LLT TrueTy = MRI.getType(Select->getTrueReg());
7329
7330 // We only do this combine for scalar boolean conditions.
7331 if (CondTy != LLT::scalar(1))
7332 return false;
7333
7334 if (TrueTy.isPointer())
7335 return false;
7336
7337 // Both are scalars.
7338 std::optional<ValueAndVReg> TrueOpt =
7340 std::optional<ValueAndVReg> FalseOpt =
7342
7343 if (!TrueOpt || !FalseOpt)
7344 return false;
7345
7346 APInt TrueValue = TrueOpt->Value;
7347 APInt FalseValue = FalseOpt->Value;
7348
7349 // select Cond, 1, 0 --> zext (Cond)
7350 if (TrueValue.isOne() && FalseValue.isZero()) {
7351 MatchInfo = [=](MachineIRBuilder &B) {
7352 B.setInstrAndDebugLoc(*Select);
7353 B.buildZExtOrTrunc(Dest, Cond);
7354 };
7355 return true;
7356 }
7357
7358 // select Cond, -1, 0 --> sext (Cond)
7359 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7360 MatchInfo = [=](MachineIRBuilder &B) {
7361 B.setInstrAndDebugLoc(*Select);
7362 B.buildSExtOrTrunc(Dest, Cond);
7363 };
7364 return true;
7365 }
7366
7367 // select Cond, 0, 1 --> zext (!Cond)
7368 if (TrueValue.isZero() && FalseValue.isOne()) {
7369 MatchInfo = [=](MachineIRBuilder &B) {
7370 B.setInstrAndDebugLoc(*Select);
7371 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7372 B.buildNot(Inner, Cond);
7373 B.buildZExtOrTrunc(Dest, Inner);
7374 };
7375 return true;
7376 }
7377
7378 // select Cond, 0, -1 --> sext (!Cond)
7379 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7380 MatchInfo = [=](MachineIRBuilder &B) {
7381 B.setInstrAndDebugLoc(*Select);
7382 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7383 B.buildNot(Inner, Cond);
7384 B.buildSExtOrTrunc(Dest, Inner);
7385 };
7386 return true;
7387 }
7388
7389 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7390 if (TrueValue - 1 == FalseValue) {
7391 MatchInfo = [=](MachineIRBuilder &B) {
7392 B.setInstrAndDebugLoc(*Select);
7393 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7394 B.buildZExtOrTrunc(Inner, Cond);
7395 B.buildAdd(Dest, Inner, False);
7396 };
7397 return true;
7398 }
7399
7400 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7401 if (TrueValue + 1 == FalseValue) {
7402 MatchInfo = [=](MachineIRBuilder &B) {
7403 B.setInstrAndDebugLoc(*Select);
7404 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7405 B.buildSExtOrTrunc(Inner, Cond);
7406 B.buildAdd(Dest, Inner, False);
7407 };
7408 return true;
7409 }
7410
7411 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7412 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7413 MatchInfo = [=](MachineIRBuilder &B) {
7414 B.setInstrAndDebugLoc(*Select);
7415 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7416 B.buildZExtOrTrunc(Inner, Cond);
7417 // The shift amount must be scalar.
7418 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7419 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7420 B.buildShl(Dest, Inner, ShAmtC, Flags);
7421 };
7422 return true;
7423 }
7424
7425 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7426 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7427 MatchInfo = [=](MachineIRBuilder &B) {
7428 B.setInstrAndDebugLoc(*Select);
7429 Register Not = MRI.createGenericVirtualRegister(CondTy);
7430 B.buildNot(Not, Cond);
7431 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7432 B.buildZExtOrTrunc(Inner, Not);
7433 // The shift amount must be scalar.
7434 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7435 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7436 B.buildShl(Dest, Inner, ShAmtC, Flags);
7437 };
7438 return true;
7439 }
7440
7441 // select Cond, -1, C --> or (sext Cond), C
7442 if (TrueValue.isAllOnes()) {
7443 MatchInfo = [=](MachineIRBuilder &B) {
7444 B.setInstrAndDebugLoc(*Select);
7445 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7446 B.buildSExtOrTrunc(Inner, Cond);
7447 B.buildOr(Dest, Inner, False, Flags);
7448 };
7449 return true;
7450 }
7451
7452 // select Cond, C, -1 --> or (sext (not Cond)), C
7453 if (FalseValue.isAllOnes()) {
7454 MatchInfo = [=](MachineIRBuilder &B) {
7455 B.setInstrAndDebugLoc(*Select);
7456 Register Not = MRI.createGenericVirtualRegister(CondTy);
7457 B.buildNot(Not, Cond);
7458 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7459 B.buildSExtOrTrunc(Inner, Not);
7460 B.buildOr(Dest, Inner, True, Flags);
7461 };
7462 return true;
7463 }
7464
7465 return false;
7466}
7467
7468// TODO: use knownbits to determine zeros
7469bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7470 BuildFnTy &MatchInfo) const {
7471 uint32_t Flags = Select->getFlags();
7472 Register DstReg = Select->getReg(0);
7473 Register Cond = Select->getCondReg();
7474 Register True = Select->getTrueReg();
7475 Register False = Select->getFalseReg();
7476 LLT CondTy = MRI.getType(Select->getCondReg());
7477 LLT TrueTy = MRI.getType(Select->getTrueReg());
7478
7479 // Boolean or fixed vector of booleans.
7480 if (CondTy.isScalableVector() ||
7481 (CondTy.isFixedVector() &&
7482 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7483 CondTy.getScalarSizeInBits() != 1)
7484 return false;
7485
7486 if (CondTy != TrueTy)
7487 return false;
7488
7489 // select Cond, Cond, F --> or Cond, F
7490 // select Cond, 1, F --> or Cond, F
7491 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7492 MatchInfo = [=](MachineIRBuilder &B) {
7493 B.setInstrAndDebugLoc(*Select);
7494 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7495 B.buildZExtOrTrunc(Ext, Cond);
7496 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7497 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7498 };
7499 return true;
7500 }
7501
7502 // select Cond, T, Cond --> and Cond, T
7503 // select Cond, T, 0 --> and Cond, T
7504 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7505 MatchInfo = [=](MachineIRBuilder &B) {
7506 B.setInstrAndDebugLoc(*Select);
7507 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7508 B.buildZExtOrTrunc(Ext, Cond);
7509 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7510 B.buildAnd(DstReg, Ext, FreezeTrue);
7511 };
7512 return true;
7513 }
7514
7515 // select Cond, T, 1 --> or (not Cond), T
7516 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7517 MatchInfo = [=](MachineIRBuilder &B) {
7518 B.setInstrAndDebugLoc(*Select);
7519 // First the not.
7520 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7521 B.buildNot(Inner, Cond);
7522 // Then an ext to match the destination register.
7523 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7524 B.buildZExtOrTrunc(Ext, Inner);
7525 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7526 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7527 };
7528 return true;
7529 }
7530
7531 // select Cond, 0, F --> and (not Cond), F
7532 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7533 MatchInfo = [=](MachineIRBuilder &B) {
7534 B.setInstrAndDebugLoc(*Select);
7535 // First the not.
7536 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7537 B.buildNot(Inner, Cond);
7538 // Then an ext to match the destination register.
7539 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7540 B.buildZExtOrTrunc(Ext, Inner);
7541 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7542 B.buildAnd(DstReg, Ext, FreezeFalse);
7543 };
7544 return true;
7545 }
7546
7547 return false;
7548}
7549
7551 BuildFnTy &MatchInfo) const {
7552 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7553 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7554
7555 Register DstReg = Select->getReg(0);
7556 Register True = Select->getTrueReg();
7557 Register False = Select->getFalseReg();
7558 LLT DstTy = MRI.getType(DstReg);
7559
7560 if (DstTy.isPointerOrPointerVector())
7561 return false;
7562
7563 // We want to fold the icmp and replace the select.
7564 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7565 return false;
7566
7567 CmpInst::Predicate Pred = Cmp->getCond();
7568 // We need a larger or smaller predicate for
7569 // canonicalization.
7570 if (CmpInst::isEquality(Pred))
7571 return false;
7572
7573 Register CmpLHS = Cmp->getLHSReg();
7574 Register CmpRHS = Cmp->getRHSReg();
7575
7576 // We can swap CmpLHS and CmpRHS for higher hitrate.
7577 if (True == CmpRHS && False == CmpLHS) {
7578 std::swap(CmpLHS, CmpRHS);
7579 Pred = CmpInst::getSwappedPredicate(Pred);
7580 }
7581
7582 // (icmp X, Y) ? X : Y -> integer minmax.
7583 // see matchSelectPattern in ValueTracking.
7584 // Legality between G_SELECT and integer minmax can differ.
7585 if (True != CmpLHS || False != CmpRHS)
7586 return false;
7587
7588 switch (Pred) {
7589 case ICmpInst::ICMP_UGT:
7590 case ICmpInst::ICMP_UGE: {
7591 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7592 return false;
7593 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7594 return true;
7595 }
7596 case ICmpInst::ICMP_SGT:
7597 case ICmpInst::ICMP_SGE: {
7598 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7599 return false;
7600 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7601 return true;
7602 }
7603 case ICmpInst::ICMP_ULT:
7604 case ICmpInst::ICMP_ULE: {
7605 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7606 return false;
7607 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7608 return true;
7609 }
7610 case ICmpInst::ICMP_SLT:
7611 case ICmpInst::ICMP_SLE: {
7612 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7613 return false;
7614 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7615 return true;
7616 }
7617 default:
7618 return false;
7619 }
7620}
7621
7622// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7624 BuildFnTy &MatchInfo) const {
7625 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7626 Register DestReg = MI.getOperand(0).getReg();
7627 LLT DestTy = MRI.getType(DestReg);
7628
7629 Register X;
7630 Register Sub0;
7631 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7632 if (mi_match(DestReg, MRI,
7633 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7634 m_GSMax(m_Reg(X), NegPattern),
7635 m_GUMin(m_Reg(X), NegPattern),
7636 m_GUMax(m_Reg(X), NegPattern)))))) {
7637 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7638 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7639 if (isLegal({NewOpc, {DestTy}})) {
7640 MatchInfo = [=](MachineIRBuilder &B) {
7641 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7642 };
7643 return true;
7644 }
7645 }
7646
7647 return false;
7648}
7649
7652
7653 if (tryFoldSelectOfConstants(Select, MatchInfo))
7654 return true;
7655
7656 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7657 return true;
7658
7659 return false;
7660}
7661
7662/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7663/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7664/// into a single comparison using range-based reasoning.
7665/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7666bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7667 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7668 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7669 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7670 Register DstReg = Logic->getReg(0);
7671 Register LHS = Logic->getLHSReg();
7672 Register RHS = Logic->getRHSReg();
7673 unsigned Flags = Logic->getFlags();
7674
7675 // We need an G_ICMP on the LHS register.
7676 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7677 if (!Cmp1)
7678 return false;
7679
7680 // We need an G_ICMP on the RHS register.
7681 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7682 if (!Cmp2)
7683 return false;
7684
7685 // We want to fold the icmps.
7686 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7687 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7688 return false;
7689
7690 APInt C1;
7691 APInt C2;
7692 std::optional<ValueAndVReg> MaybeC1 =
7694 if (!MaybeC1)
7695 return false;
7696 C1 = MaybeC1->Value;
7697
7698 std::optional<ValueAndVReg> MaybeC2 =
7700 if (!MaybeC2)
7701 return false;
7702 C2 = MaybeC2->Value;
7703
7704 Register R1 = Cmp1->getLHSReg();
7705 Register R2 = Cmp2->getLHSReg();
7706 CmpInst::Predicate Pred1 = Cmp1->getCond();
7707 CmpInst::Predicate Pred2 = Cmp2->getCond();
7708 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7709 LLT CmpOperandTy = MRI.getType(R1);
7710
7711 if (CmpOperandTy.isPointer())
7712 return false;
7713
7714 // We build ands, adds, and constants of type CmpOperandTy.
7715 // They must be legal to build.
7716 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7717 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7718 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7719 return false;
7720
7721 // Look through add of a constant offset on R1, R2, or both operands. This
7722 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7723 std::optional<APInt> Offset1;
7724 std::optional<APInt> Offset2;
7725 if (R1 != R2) {
7726 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7727 std::optional<ValueAndVReg> MaybeOffset1 =
7729 if (MaybeOffset1) {
7730 R1 = Add->getLHSReg();
7731 Offset1 = MaybeOffset1->Value;
7732 }
7733 }
7734 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7735 std::optional<ValueAndVReg> MaybeOffset2 =
7737 if (MaybeOffset2) {
7738 R2 = Add->getLHSReg();
7739 Offset2 = MaybeOffset2->Value;
7740 }
7741 }
7742 }
7743
7744 if (R1 != R2)
7745 return false;
7746
7747 // We calculate the icmp ranges including maybe offsets.
7748 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7749 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7750 if (Offset1)
7751 CR1 = CR1.subtract(*Offset1);
7752
7753 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7754 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7755 if (Offset2)
7756 CR2 = CR2.subtract(*Offset2);
7757
7758 bool CreateMask = false;
7759 APInt LowerDiff;
7760 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7761 if (!CR) {
7762 // We need non-wrapping ranges.
7763 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7764 return false;
7765
7766 // Check whether we have equal-size ranges that only differ by one bit.
7767 // In that case we can apply a mask to map one range onto the other.
7768 LowerDiff = CR1.getLower() ^ CR2.getLower();
7769 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7770 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7771 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7772 CR1Size != CR2.getUpper() - CR2.getLower())
7773 return false;
7774
7775 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7776 CreateMask = true;
7777 }
7778
7779 if (IsAnd)
7780 CR = CR->inverse();
7781
7782 CmpInst::Predicate NewPred;
7783 APInt NewC, Offset;
7784 CR->getEquivalentICmp(NewPred, NewC, Offset);
7785
7786 // We take the result type of one of the original icmps, CmpTy, for
7787 // the to be build icmp. The operand type, CmpOperandTy, is used for
7788 // the other instructions and constants to be build. The types of
7789 // the parameters and output are the same for add and and. CmpTy
7790 // and the type of DstReg might differ. That is why we zext or trunc
7791 // the icmp into the destination register.
7792
7793 MatchInfo = [=](MachineIRBuilder &B) {
7794 if (CreateMask && Offset != 0) {
7795 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7796 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7797 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7798 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7799 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7800 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7801 B.buildZExtOrTrunc(DstReg, ICmp);
7802 } else if (CreateMask && Offset == 0) {
7803 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7804 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7805 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7806 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7807 B.buildZExtOrTrunc(DstReg, ICmp);
7808 } else if (!CreateMask && Offset != 0) {
7809 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7810 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7811 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7812 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7813 B.buildZExtOrTrunc(DstReg, ICmp);
7814 } else if (!CreateMask && Offset == 0) {
7815 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7816 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7817 B.buildZExtOrTrunc(DstReg, ICmp);
7818 } else {
7819 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7820 }
7821 };
7822 return true;
7823}
7824
7825bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7826 BuildFnTy &MatchInfo) const {
7827 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7828 Register DestReg = Logic->getReg(0);
7829 Register LHS = Logic->getLHSReg();
7830 Register RHS = Logic->getRHSReg();
7831 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7832
7833 // We need a compare on the LHS register.
7834 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7835 if (!Cmp1)
7836 return false;
7837
7838 // We need a compare on the RHS register.
7839 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7840 if (!Cmp2)
7841 return false;
7842
7843 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7844 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7845
7846 // We build one fcmp, want to fold the fcmps, replace the logic op,
7847 // and the fcmps must have the same shape.
7849 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7850 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7851 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7852 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7853 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7854 return false;
7855
7856 CmpInst::Predicate PredL = Cmp1->getCond();
7857 CmpInst::Predicate PredR = Cmp2->getCond();
7858 Register LHS0 = Cmp1->getLHSReg();
7859 Register LHS1 = Cmp1->getRHSReg();
7860 Register RHS0 = Cmp2->getLHSReg();
7861 Register RHS1 = Cmp2->getRHSReg();
7862
7863 if (LHS0 == RHS1 && LHS1 == RHS0) {
7864 // Swap RHS operands to match LHS.
7865 PredR = CmpInst::getSwappedPredicate(PredR);
7866 std::swap(RHS0, RHS1);
7867 }
7868
7869 if (LHS0 == RHS0 && LHS1 == RHS1) {
7870 // We determine the new predicate.
7871 unsigned CmpCodeL = getFCmpCode(PredL);
7872 unsigned CmpCodeR = getFCmpCode(PredR);
7873 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7874 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7875 MatchInfo = [=](MachineIRBuilder &B) {
7876 // The fcmp predicates fill the lower part of the enum.
7877 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7878 if (Pred == FCmpInst::FCMP_FALSE &&
7880 auto False = B.buildConstant(CmpTy, 0);
7881 B.buildZExtOrTrunc(DestReg, False);
7882 } else if (Pred == FCmpInst::FCMP_TRUE &&
7884 auto True =
7885 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7886 CmpTy.isVector() /*isVector*/,
7887 true /*isFP*/));
7888 B.buildZExtOrTrunc(DestReg, True);
7889 } else { // We take the predicate without predicate optimizations.
7890 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7891 B.buildZExtOrTrunc(DestReg, Cmp);
7892 }
7893 };
7894 return true;
7895 }
7896
7897 return false;
7898}
7899
7901 GAnd *And = cast<GAnd>(&MI);
7902
7903 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7904 return true;
7905
7906 if (tryFoldLogicOfFCmps(And, MatchInfo))
7907 return true;
7908
7909 return false;
7910}
7911
7913 GOr *Or = cast<GOr>(&MI);
7914
7915 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7916 return true;
7917
7918 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7919 return true;
7920
7921 return false;
7922}
7923
7925 BuildFnTy &MatchInfo) const {
7927
7928 // Addo has no flags
7929 Register Dst = Add->getReg(0);
7930 Register Carry = Add->getReg(1);
7931 Register LHS = Add->getLHSReg();
7932 Register RHS = Add->getRHSReg();
7933 bool IsSigned = Add->isSigned();
7934 LLT DstTy = MRI.getType(Dst);
7935 LLT CarryTy = MRI.getType(Carry);
7936
7937 // Fold addo, if the carry is dead -> add, undef.
7938 if (MRI.use_nodbg_empty(Carry) &&
7939 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7940 MatchInfo = [=](MachineIRBuilder &B) {
7941 B.buildAdd(Dst, LHS, RHS);
7942 B.buildUndef(Carry);
7943 };
7944 return true;
7945 }
7946
7947 // Canonicalize constant to RHS.
7948 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7949 if (IsSigned) {
7950 MatchInfo = [=](MachineIRBuilder &B) {
7951 B.buildSAddo(Dst, Carry, RHS, LHS);
7952 };
7953 return true;
7954 }
7955 // !IsSigned
7956 MatchInfo = [=](MachineIRBuilder &B) {
7957 B.buildUAddo(Dst, Carry, RHS, LHS);
7958 };
7959 return true;
7960 }
7961
7962 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7963 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7964
7965 // Fold addo(c1, c2) -> c3, carry.
7966 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7968 bool Overflow;
7969 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7970 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7971 MatchInfo = [=](MachineIRBuilder &B) {
7972 B.buildConstant(Dst, Result);
7973 B.buildConstant(Carry, Overflow);
7974 };
7975 return true;
7976 }
7977
7978 // Fold (addo x, 0) -> x, no carry
7979 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7980 MatchInfo = [=](MachineIRBuilder &B) {
7981 B.buildCopy(Dst, LHS);
7982 B.buildConstant(Carry, 0);
7983 };
7984 return true;
7985 }
7986
7987 // Given 2 constant operands whose sum does not overflow:
7988 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7989 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7990 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7991 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7992 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7993 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7994 std::optional<APInt> MaybeAddRHS =
7995 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7996 if (MaybeAddRHS) {
7997 bool Overflow;
7998 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7999 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8000 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8001 if (IsSigned) {
8002 MatchInfo = [=](MachineIRBuilder &B) {
8003 auto ConstRHS = B.buildConstant(DstTy, NewC);
8004 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8005 };
8006 return true;
8007 }
8008 // !IsSigned
8009 MatchInfo = [=](MachineIRBuilder &B) {
8010 auto ConstRHS = B.buildConstant(DstTy, NewC);
8011 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8012 };
8013 return true;
8014 }
8015 }
8016 };
8017
8018 // We try to combine addo to non-overflowing add.
8019 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8021 return false;
8022
8023 // We try to combine uaddo to non-overflowing add.
8024 if (!IsSigned) {
8025 ConstantRange CRLHS =
8026 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8027 ConstantRange CRRHS =
8028 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8029
8030 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8032 return false;
8034 MatchInfo = [=](MachineIRBuilder &B) {
8035 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8036 B.buildConstant(Carry, 0);
8037 };
8038 return true;
8039 }
8042 MatchInfo = [=](MachineIRBuilder &B) {
8043 B.buildAdd(Dst, LHS, RHS);
8044 B.buildConstant(Carry, 1);
8045 };
8046 return true;
8047 }
8048 }
8049 return false;
8050 }
8051
8052 // We try to combine saddo to non-overflowing add.
8053
8054 // If LHS and RHS each have at least two sign bits, then there is no signed
8055 // overflow.
8056 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8057 MatchInfo = [=](MachineIRBuilder &B) {
8058 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8059 B.buildConstant(Carry, 0);
8060 };
8061 return true;
8062 }
8063
8064 ConstantRange CRLHS =
8065 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8066 ConstantRange CRRHS =
8067 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8068
8069 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8071 return false;
8073 MatchInfo = [=](MachineIRBuilder &B) {
8074 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8075 B.buildConstant(Carry, 0);
8076 };
8077 return true;
8078 }
8081 MatchInfo = [=](MachineIRBuilder &B) {
8082 B.buildAdd(Dst, LHS, RHS);
8083 B.buildConstant(Carry, 1);
8084 };
8085 return true;
8086 }
8087 }
8088
8089 return false;
8090}
8091
8093 BuildFnTy &MatchInfo) const {
8095 MatchInfo(Builder);
8096 Root->eraseFromParent();
8097}
8098
8100 int64_t Exponent) const {
8101 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8103}
8104
8106 int64_t Exponent) const {
8107 auto [Dst, Base] = MI.getFirst2Regs();
8108 LLT Ty = MRI.getType(Dst);
8109 int64_t ExpVal = Exponent;
8110
8111 if (ExpVal == 0) {
8112 Builder.buildFConstant(Dst, 1.0);
8113 MI.removeFromParent();
8114 return;
8115 }
8116
8117 if (ExpVal < 0)
8118 ExpVal = -ExpVal;
8119
8120 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8121 // to generate the multiply sequence. There are more optimal ways to do this
8122 // (for example, powi(x,15) generates one more multiply than it should), but
8123 // this has the benefit of being both really simple and much better than a
8124 // libcall.
8125 std::optional<SrcOp> Res;
8126 SrcOp CurSquare = Base;
8127 while (ExpVal > 0) {
8128 if (ExpVal & 1) {
8129 if (!Res)
8130 Res = CurSquare;
8131 else
8132 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8133 }
8134
8135 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8136 ExpVal >>= 1;
8137 }
8138
8139 // If the original exponent was negative, invert the result, producing
8140 // 1/(x*x*x).
8141 if (Exponent < 0)
8142 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8143 MI.getFlags());
8144
8145 Builder.buildCopy(Dst, *Res);
8146 MI.eraseFromParent();
8147}
8148
8150 BuildFnTy &MatchInfo) const {
8151 // fold (A+C1)-C2 -> A+(C1-C2)
8152 const GSub *Sub = cast<GSub>(&MI);
8153 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8154
8155 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8156 return false;
8157
8158 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8159 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8160
8161 Register Dst = Sub->getReg(0);
8162 LLT DstTy = MRI.getType(Dst);
8163
8164 MatchInfo = [=](MachineIRBuilder &B) {
8165 auto Const = B.buildConstant(DstTy, C1 - C2);
8166 B.buildAdd(Dst, Add->getLHSReg(), Const);
8167 };
8168
8169 return true;
8170}
8171
8173 BuildFnTy &MatchInfo) const {
8174 // fold C2-(A+C1) -> (C2-C1)-A
8175 const GSub *Sub = cast<GSub>(&MI);
8176 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8177
8178 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8179 return false;
8180
8181 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8182 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8183
8184 Register Dst = Sub->getReg(0);
8185 LLT DstTy = MRI.getType(Dst);
8186
8187 MatchInfo = [=](MachineIRBuilder &B) {
8188 auto Const = B.buildConstant(DstTy, C2 - C1);
8189 B.buildSub(Dst, Const, Add->getLHSReg());
8190 };
8191
8192 return true;
8193}
8194
8196 BuildFnTy &MatchInfo) const {
8197 // fold (A-C1)-C2 -> A-(C1+C2)
8198 const GSub *Sub1 = cast<GSub>(&MI);
8199 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8200
8201 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8202 return false;
8203
8204 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8205 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8206
8207 Register Dst = Sub1->getReg(0);
8208 LLT DstTy = MRI.getType(Dst);
8209
8210 MatchInfo = [=](MachineIRBuilder &B) {
8211 auto Const = B.buildConstant(DstTy, C1 + C2);
8212 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8213 };
8214
8215 return true;
8216}
8217
8219 BuildFnTy &MatchInfo) const {
8220 // fold (C1-A)-C2 -> (C1-C2)-A
8221 const GSub *Sub1 = cast<GSub>(&MI);
8222 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8223
8224 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8225 return false;
8226
8227 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8228 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8229
8230 Register Dst = Sub1->getReg(0);
8231 LLT DstTy = MRI.getType(Dst);
8232
8233 MatchInfo = [=](MachineIRBuilder &B) {
8234 auto Const = B.buildConstant(DstTy, C1 - C2);
8235 B.buildSub(Dst, Const, Sub2->getRHSReg());
8236 };
8237
8238 return true;
8239}
8240
8242 BuildFnTy &MatchInfo) const {
8243 // fold ((A-C1)+C2) -> (A+(C2-C1))
8244 const GAdd *Add = cast<GAdd>(&MI);
8245 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8246
8247 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8248 return false;
8249
8250 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8251 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8252
8253 Register Dst = Add->getReg(0);
8254 LLT DstTy = MRI.getType(Dst);
8255
8256 MatchInfo = [=](MachineIRBuilder &B) {
8257 auto Const = B.buildConstant(DstTy, C2 - C1);
8258 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8259 };
8260
8261 return true;
8262}
8263
8265 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8266 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8267
8268 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8269 return false;
8270
8271 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8272
8273 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8274
8275 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8276 // $any:_(<8 x s16>) = G_ANYEXT $bv
8277 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8278 //
8279 // ->
8280 //
8281 // $any:_(s16) = G_ANYEXT $bv[0]
8282 // $any1:_(s16) = G_ANYEXT $bv[1]
8283 // $any2:_(s16) = G_ANYEXT $bv[2]
8284 // $any3:_(s16) = G_ANYEXT $bv[3]
8285 // $any4:_(s16) = G_ANYEXT $bv[4]
8286 // $any5:_(s16) = G_ANYEXT $bv[5]
8287 // $any6:_(s16) = G_ANYEXT $bv[6]
8288 // $any7:_(s16) = G_ANYEXT $bv[7]
8289 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8290 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8291
8292 // We want to unmerge into vectors.
8293 if (!DstTy.isFixedVector())
8294 return false;
8295
8296 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8297 if (!Any)
8298 return false;
8299
8300 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8301
8302 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8303 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8304
8305 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8306 return false;
8307
8308 // FIXME: check element types?
8309 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8310 return false;
8311
8312 LLT BigBvTy = MRI.getType(BV->getReg(0));
8313 LLT SmallBvTy = DstTy;
8314 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8315
8317 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8318 return false;
8319
8320 // We check the legality of scalar anyext.
8322 {TargetOpcode::G_ANYEXT,
8323 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8324 return false;
8325
8326 MatchInfo = [=](MachineIRBuilder &B) {
8327 // Build into each G_UNMERGE_VALUES def
8328 // a small build vector with anyext from the source build vector.
8329 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8331 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8332 Register SourceArray =
8333 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8334 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8335 Ops.push_back(AnyExt.getReg(0));
8336 }
8337 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8338 };
8339 };
8340 return true;
8341 };
8342
8343 return false;
8344}
8345
8347 BuildFnTy &MatchInfo) const {
8348
8349 bool Changed = false;
8350 auto &Shuffle = cast<GShuffleVector>(MI);
8351 ArrayRef<int> OrigMask = Shuffle.getMask();
8352 SmallVector<int, 16> NewMask;
8353 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8354 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8355 const unsigned NumDstElts = OrigMask.size();
8356 for (unsigned i = 0; i != NumDstElts; ++i) {
8357 int Idx = OrigMask[i];
8358 if (Idx >= (int)NumSrcElems) {
8359 Idx = -1;
8360 Changed = true;
8361 }
8362 NewMask.push_back(Idx);
8363 }
8364
8365 if (!Changed)
8366 return false;
8367
8368 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8369 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8370 std::move(NewMask));
8371 };
8372
8373 return true;
8374}
8375
8376static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8377 const unsigned MaskSize = Mask.size();
8378 for (unsigned I = 0; I < MaskSize; ++I) {
8379 int Idx = Mask[I];
8380 if (Idx < 0)
8381 continue;
8382
8383 if (Idx < (int)NumElems)
8384 Mask[I] = Idx + NumElems;
8385 else
8386 Mask[I] = Idx - NumElems;
8387 }
8388}
8389
8391 BuildFnTy &MatchInfo) const {
8392
8393 auto &Shuffle = cast<GShuffleVector>(MI);
8394 // If any of the two inputs is already undef, don't check the mask again to
8395 // prevent infinite loop
8396 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8397 return false;
8398
8399 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8400 return false;
8401
8402 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8403 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8405 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8406 return false;
8407
8408 ArrayRef<int> Mask = Shuffle.getMask();
8409 const unsigned NumSrcElems = Src1Ty.getNumElements();
8410
8411 bool TouchesSrc1 = false;
8412 bool TouchesSrc2 = false;
8413 const unsigned NumElems = Mask.size();
8414 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8415 if (Mask[Idx] < 0)
8416 continue;
8417
8418 if (Mask[Idx] < (int)NumSrcElems)
8419 TouchesSrc1 = true;
8420 else
8421 TouchesSrc2 = true;
8422 }
8423
8424 if (TouchesSrc1 == TouchesSrc2)
8425 return false;
8426
8427 Register NewSrc1 = Shuffle.getSrc1Reg();
8428 SmallVector<int, 16> NewMask(Mask);
8429 if (TouchesSrc2) {
8430 NewSrc1 = Shuffle.getSrc2Reg();
8431 commuteMask(NewMask, NumSrcElems);
8432 }
8433
8434 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8435 auto Undef = B.buildUndef(Src1Ty);
8436 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8437 };
8438
8439 return true;
8440}
8441
8443 BuildFnTy &MatchInfo) const {
8444 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8445
8446 Register Dst = Subo->getReg(0);
8447 Register LHS = Subo->getLHSReg();
8448 Register RHS = Subo->getRHSReg();
8449 Register Carry = Subo->getCarryOutReg();
8450 LLT DstTy = MRI.getType(Dst);
8451 LLT CarryTy = MRI.getType(Carry);
8452
8453 // Check legality before known bits.
8454 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8456 return false;
8457
8458 ConstantRange KBLHS =
8459 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8460 /* IsSigned=*/Subo->isSigned());
8461 ConstantRange KBRHS =
8462 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8463 /* IsSigned=*/Subo->isSigned());
8464
8465 if (Subo->isSigned()) {
8466 // G_SSUBO
8467 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8469 return false;
8471 MatchInfo = [=](MachineIRBuilder &B) {
8472 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8473 B.buildConstant(Carry, 0);
8474 };
8475 return true;
8476 }
8479 MatchInfo = [=](MachineIRBuilder &B) {
8480 B.buildSub(Dst, LHS, RHS);
8481 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8482 /*isVector=*/CarryTy.isVector(),
8483 /*isFP=*/false));
8484 };
8485 return true;
8486 }
8487 }
8488 return false;
8489 }
8490
8491 // G_USUBO
8492 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8494 return false;
8496 MatchInfo = [=](MachineIRBuilder &B) {
8497 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8498 B.buildConstant(Carry, 0);
8499 };
8500 return true;
8501 }
8504 MatchInfo = [=](MachineIRBuilder &B) {
8505 B.buildSub(Dst, LHS, RHS);
8506 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8507 /*isVector=*/CarryTy.isVector(),
8508 /*isFP=*/false));
8509 };
8510 return true;
8511 }
8512 }
8513
8514 return false;
8515}
8516
8517// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8518// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8520 BuildFnTy &MatchInfo) const {
8521 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8522 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) &&
8523 "Expected G_CTLZ variant");
8524
8525 const Register Dst = CtlzMI.getOperand(0).getReg();
8526 Register Src = CtlzMI.getOperand(1).getReg();
8527
8528 LLT Ty = MRI.getType(Dst);
8529 LLT SrcTy = MRI.getType(Src);
8530
8531 if (!(Ty.isValid() && Ty.isScalar()))
8532 return false;
8533
8534 if (!LI)
8535 return false;
8536
8537 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8538 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8539
8540 switch (LI->getAction(Query).Action) {
8541 default:
8542 return false;
8546 break;
8547 }
8548
8549 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8550 Register V;
8551 bool NeedAdd = true;
8552 if (mi_match(Src, MRI,
8554 m_SpecificICst(1))))) {
8555 NeedAdd = false;
8556 Src = V;
8557 }
8558
8559 unsigned BitWidth = Ty.getScalarSizeInBits();
8560
8561 Register X;
8562 if (!mi_match(Src, MRI,
8565 m_SpecificICst(BitWidth - 1)))))))
8566 return false;
8567
8568 MatchInfo = [=](MachineIRBuilder &B) {
8569 if (!NeedAdd) {
8570 B.buildCTLS(Dst, X);
8571 return;
8572 }
8573
8574 auto Ctls = B.buildCTLS(Ty, X);
8575 auto One = B.buildConstant(Ty, 1);
8576
8577 B.buildAdd(Dst, Ctls, One);
8578 };
8579
8580 return true;
8581}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
const fltSemantics & getSemantics() const
Definition APFloat.h:1520
bool isNaN() const
Definition APFloat.h:1510
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1298
APInt bitcastToAPInt() const
Definition APFloat.h:1416
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1792
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:835
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1607
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1052
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1285
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1665
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is zero.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
const APFloat & getValue() const
Definition Constants.h:326
const APFloat & getValueAPF() const
Definition Constants.h:325
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:215
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1488
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2042
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:654
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:462
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1448
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1613
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:742
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1571
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1595
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:495
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1628
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1660
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:673
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:306
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1551
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:201
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1481
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:972
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:280
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:448
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1584
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1685
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:470
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:502
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1466
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:231
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:264
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:255
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:261
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...