LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 TII(Builder.getMF().getSubtarget().getInstrInfo()),
66 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
67 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
68 (void)this->VT;
69}
70
72 return *Builder.getMF().getSubtarget().getTargetLowering();
73}
74
76 return Builder.getMF();
77}
78
82
83LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
84
85/// \returns The little endian in-memory byte position of byte \p I in a
86/// \p ByteWidth bytes wide type.
87///
88/// E.g. Given a 4-byte type x, x[0] -> byte 0
89static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
90 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
91 return I;
92}
93
94/// Determines the LogBase2 value for a non-null input value using the
95/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
97 auto &MRI = *MIB.getMRI();
98 LLT Ty = MRI.getType(V);
99 auto Ctlz = MIB.buildCTLZ(Ty, V);
100 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
101 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
102}
103
104/// \returns The big endian in-memory byte position of byte \p I in a
105/// \p ByteWidth bytes wide type.
106///
107/// E.g. Given a 4-byte type x, x[0] -> byte 3
108static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
109 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
110 return ByteWidth - I - 1;
111}
112
113/// Given a map from byte offsets in memory to indices in a load/store,
114/// determine if that map corresponds to a little or big endian byte pattern.
115///
116/// \param MemOffset2Idx maps memory offsets to address offsets.
117/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
118///
119/// \returns true if the map corresponds to a big endian byte pattern, false if
120/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
121///
122/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
123/// are as follows:
124///
125/// AddrOffset Little endian Big endian
126/// 0 0 3
127/// 1 1 2
128/// 2 2 1
129/// 3 3 0
130static std::optional<bool>
132 int64_t LowestIdx) {
133 // Need at least two byte positions to decide on endianness.
134 unsigned Width = MemOffset2Idx.size();
135 if (Width < 2)
136 return std::nullopt;
137 bool BigEndian = true, LittleEndian = true;
138 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
139 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
140 if (MemOffsetAndIdx == MemOffset2Idx.end())
141 return std::nullopt;
142 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
143 assert(Idx >= 0 && "Expected non-negative byte offset?");
144 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
145 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
146 if (!BigEndian && !LittleEndian)
147 return std::nullopt;
148 }
149
150 assert((BigEndian != LittleEndian) &&
151 "Pattern cannot be both big and little endian!");
152 return BigEndian;
153}
154
156
157bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
158 assert(LI && "Must have LegalizerInfo to query isLegal!");
159 return LI->getAction(Query).Action == LegalizeActions::Legal;
160}
161
163 const LegalityQuery &Query) const {
164 return isPreLegalize() || isLegal(Query);
165}
166
168 return isLegal(Query) ||
169 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
170}
171
173 const LegalityQuery &Query) const {
174 LegalizeAction Action = LI->getAction(Query).Action;
175 return Action == LegalizeActions::Legal ||
177}
178
180 if (!Ty.isVector())
181 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
182 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
183 if (isPreLegalize())
184 return true;
185 LLT EltTy = Ty.getElementType();
186 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
187 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
188}
189
191 Register ToReg) const {
192 Observer.changingAllUsesOfReg(MRI, FromReg);
193
194 if (MRI.constrainRegAttrs(ToReg, FromReg))
195 MRI.replaceRegWith(FromReg, ToReg);
196 else
197 Builder.buildCopy(FromReg, ToReg);
198
199 Observer.finishedChangingAllUsesOfReg();
200}
201
203 MachineOperand &FromRegOp,
204 Register ToReg) const {
205 assert(FromRegOp.getParent() && "Expected an operand in an MI");
206 Observer.changingInstr(*FromRegOp.getParent());
207
208 FromRegOp.setReg(ToReg);
209
210 Observer.changedInstr(*FromRegOp.getParent());
211}
212
214 unsigned ToOpcode) const {
215 Observer.changingInstr(FromMI);
216
217 FromMI.setDesc(Builder.getTII().get(ToOpcode));
218
219 Observer.changedInstr(FromMI);
220}
221
223 return RBI->getRegBank(Reg, MRI, *TRI);
224}
225
227 const RegisterBank *RegBank) const {
228 if (RegBank)
229 MRI.setRegBank(Reg, *RegBank);
230}
231
233 if (matchCombineCopy(MI)) {
235 return true;
236 }
237 return false;
238}
240 if (MI.getOpcode() != TargetOpcode::COPY)
241 return false;
242 Register DstReg = MI.getOperand(0).getReg();
243 Register SrcReg = MI.getOperand(1).getReg();
244 return canReplaceReg(DstReg, SrcReg, MRI);
245}
247 Register DstReg = MI.getOperand(0).getReg();
248 Register SrcReg = MI.getOperand(1).getReg();
249 replaceRegWith(MRI, DstReg, SrcReg);
250 MI.eraseFromParent();
251}
252
254 MachineInstr &MI, BuildFnTy &MatchInfo) const {
255 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
256 Register DstOp = MI.getOperand(0).getReg();
257 Register OrigOp = MI.getOperand(1).getReg();
258
259 if (!MRI.hasOneNonDBGUse(OrigOp))
260 return false;
261
262 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
263 // Even if only a single operand of the PHI is not guaranteed non-poison,
264 // moving freeze() backwards across a PHI can cause optimization issues for
265 // other users of that operand.
266 //
267 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
268 // the source register is unprofitable because it makes the freeze() more
269 // strict than is necessary (it would affect the whole register instead of
270 // just the subreg being frozen).
271 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
272 return false;
273
274 if (canCreateUndefOrPoison(OrigOp, MRI,
275 /*ConsiderFlagsAndMetadata=*/false))
276 return false;
277
278 std::optional<MachineOperand> MaybePoisonOperand;
279 for (MachineOperand &Operand : OrigDef->uses()) {
280 if (!Operand.isReg())
281 return false;
282
283 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
284 continue;
285
286 if (!MaybePoisonOperand)
287 MaybePoisonOperand = Operand;
288 else {
289 // We have more than one maybe-poison operand. Moving the freeze is
290 // unsafe.
291 return false;
292 }
293 }
294
295 // Eliminate freeze if all operands are guaranteed non-poison.
296 if (!MaybePoisonOperand) {
297 MatchInfo = [=](MachineIRBuilder &B) {
298 Observer.changingInstr(*OrigDef);
299 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
300 Observer.changedInstr(*OrigDef);
301 B.buildCopy(DstOp, OrigOp);
302 };
303 return true;
304 }
305
306 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
307 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
308
309 MatchInfo = [=](MachineIRBuilder &B) mutable {
310 Observer.changingInstr(*OrigDef);
311 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
312 Observer.changedInstr(*OrigDef);
313 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
314 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
316 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
317 Freeze.getReg(0));
318 replaceRegWith(MRI, DstOp, OrigOp);
319 };
320 return true;
321}
322
325 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
326 "Invalid instruction");
327 bool IsUndef = true;
328 MachineInstr *Undef = nullptr;
329
330 // Walk over all the operands of concat vectors and check if they are
331 // build_vector themselves or undef.
332 // Then collect their operands in Ops.
333 for (const MachineOperand &MO : MI.uses()) {
334 Register Reg = MO.getReg();
335 MachineInstr *Def = MRI.getVRegDef(Reg);
336 assert(Def && "Operand not defined");
337 if (!MRI.hasOneNonDBGUse(Reg))
338 return false;
339 switch (Def->getOpcode()) {
340 case TargetOpcode::G_BUILD_VECTOR:
341 IsUndef = false;
342 // Remember the operands of the build_vector to fold
343 // them into the yet-to-build flattened concat vectors.
344 for (const MachineOperand &BuildVecMO : Def->uses())
345 Ops.push_back(BuildVecMO.getReg());
346 break;
347 case TargetOpcode::G_IMPLICIT_DEF: {
348 LLT OpType = MRI.getType(Reg);
349 // Keep one undef value for all the undef operands.
350 if (!Undef) {
351 Builder.setInsertPt(*MI.getParent(), MI);
352 Undef = Builder.buildUndef(OpType.getScalarType());
353 }
354 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
355 OpType.getScalarType() &&
356 "All undefs should have the same type");
357 // Break the undef vector in as many scalar elements as needed
358 // for the flattening.
359 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
360 EltIdx != EltEnd; ++EltIdx)
361 Ops.push_back(Undef->getOperand(0).getReg());
362 break;
363 }
364 default:
365 return false;
366 }
367 }
368
369 // Check if the combine is illegal
370 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
372 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
373 return false;
374 }
375
376 if (IsUndef)
377 Ops.clear();
378
379 return true;
380}
383 // We determined that the concat_vectors can be flatten.
384 // Generate the flattened build_vector.
385 Register DstReg = MI.getOperand(0).getReg();
386 Builder.setInsertPt(*MI.getParent(), MI);
387 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
388
389 // Note: IsUndef is sort of redundant. We could have determine it by
390 // checking that at all Ops are undef. Alternatively, we could have
391 // generate a build_vector of undefs and rely on another combine to
392 // clean that up. For now, given we already gather this information
393 // in matchCombineConcatVectors, just save compile time and issue the
394 // right thing.
395 if (Ops.empty())
396 Builder.buildUndef(NewDstReg);
397 else
398 Builder.buildBuildVector(NewDstReg, Ops);
399 replaceRegWith(MRI, DstReg, NewDstReg);
400 MI.eraseFromParent();
401}
402
405 auto &BV = cast<GBuildVector>(MI);
406
407 // Look at the first operand for a unmerge(bitcast) from a scalar type.
408 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
409 if (!Unmerge || Unmerge->getReg(0) != BV.getSourceReg(0))
410 return false;
411 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
412 if (BC->getOpcode() != TargetOpcode::G_BITCAST)
413 return false;
414 LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
415 unsigned Factor = Unmerge->getNumDefs();
416 if (!InputTy.isScalar() || BV.getNumSources() % Factor != 0)
417 return false;
418
419 // Check if the build_vector is legal
420 LLT BVDstTy = LLT::fixed_vector(BV.getNumSources() / Factor, InputTy);
421 if (!isLegal({TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
422 return false;
423
424 // Check all other operands are bitcasts or undef.
425 for (unsigned Idx = 0; Idx < BV.getNumSources(); Idx += Factor) {
426 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(Idx), MRI);
427 if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
428 MachineInstr *Src = MRI.getVRegDef(BV.getSourceReg(Idx + J));
429 if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
430 return true;
431 return Unmerge && BV.getSourceReg(Idx + J) == Unmerge->getReg(J);
432 }))
433 return false;
434 if (!Unmerge)
435 Ops.push_back(0);
436 else {
437 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
438 if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
439 MRI.getType(BC->getOperand(1).getReg()) != InputTy)
440 return false;
441 Ops.push_back(BC->getOperand(1).getReg());
442 }
443 }
444
445 return true;
446}
447
450 LLT SrcTy = MRI.getType(Ops[0]);
451 // Build undef if any operations require it.
452 Register Undef = 0;
453 for (Register &Op : Ops) {
454 if (!Op) {
455 if (!Undef)
456 Undef = Builder.buildUndef(SrcTy).getReg(0);
457 Op = Undef;
458 }
459 }
460
461 LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
462 auto BV = Builder.buildBuildVector(BVDstTy, Ops);
463 Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
464 MI.eraseFromParent();
465}
466
468 auto &Shuffle = cast<GShuffleVector>(MI);
469
470 Register SrcVec1 = Shuffle.getSrc1Reg();
471 Register SrcVec2 = Shuffle.getSrc2Reg();
472 LLT EltTy = MRI.getType(SrcVec1).getElementType();
473 int Width = MRI.getType(SrcVec1).getNumElements();
474
475 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
476 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
477
478 SmallVector<Register> Extracts;
479 // Select only applicable elements from unmerged values.
480 for (int Val : Shuffle.getMask()) {
481 if (Val == -1)
482 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
483 else if (Val < Width)
484 Extracts.push_back(Unmerge1.getReg(Val));
485 else
486 Extracts.push_back(Unmerge2.getReg(Val - Width));
487 }
488 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
489 if (Extracts.size() == 1)
490 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
491 else
492 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
493 MI.eraseFromParent();
494}
495
498 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
499 auto ConcatMI1 =
500 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
501 auto ConcatMI2 =
502 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
503 if (!ConcatMI1 || !ConcatMI2)
504 return false;
505
506 // Check that the sources of the Concat instructions have the same type
507 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
508 MRI.getType(ConcatMI2->getSourceReg(0)))
509 return false;
510
511 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
512 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
513 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
514 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
515 // Check if the index takes a whole source register from G_CONCAT_VECTORS
516 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
517 if (Mask[i] == -1) {
518 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
519 if (i + j >= Mask.size())
520 return false;
521 if (Mask[i + j] != -1)
522 return false;
523 }
525 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
526 return false;
527 Ops.push_back(0);
528 } else if (Mask[i] % ConcatSrcNumElt == 0) {
529 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
530 if (i + j >= Mask.size())
531 return false;
532 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
533 return false;
534 }
535 // Retrieve the source register from its respective G_CONCAT_VECTORS
536 // instruction
537 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
538 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
539 } else {
540 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
541 ConcatMI1->getNumSources()));
542 }
543 } else {
544 return false;
545 }
546 }
547
549 {TargetOpcode::G_CONCAT_VECTORS,
550 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
551 return false;
552
553 return !Ops.empty();
554}
555
558 LLT SrcTy;
559 for (Register &Reg : Ops) {
560 if (Reg != 0)
561 SrcTy = MRI.getType(Reg);
562 }
563 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
564
565 Register UndefReg = 0;
566
567 for (Register &Reg : Ops) {
568 if (Reg == 0) {
569 if (UndefReg == 0)
570 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
571 Reg = UndefReg;
572 }
573 }
574
575 if (Ops.size() > 1)
576 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
577 else
578 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
579 MI.eraseFromParent();
580}
581
586 return true;
587 }
588 return false;
589}
590
593 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
594 "Invalid instruction kind");
595 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
596 Register Src1 = MI.getOperand(1).getReg();
597 LLT SrcType = MRI.getType(Src1);
598
599 unsigned DstNumElts = DstType.getNumElements();
600 unsigned SrcNumElts = SrcType.getNumElements();
601
602 // If the resulting vector is smaller than the size of the source
603 // vectors being concatenated, we won't be able to replace the
604 // shuffle vector into a concat_vectors.
605 //
606 // Note: We may still be able to produce a concat_vectors fed by
607 // extract_vector_elt and so on. It is less clear that would
608 // be better though, so don't bother for now.
609 //
610 // If the destination is a scalar, the size of the sources doesn't
611 // matter. we will lower the shuffle to a plain copy. This will
612 // work only if the source and destination have the same size. But
613 // that's covered by the next condition.
614 //
615 // TODO: If the size between the source and destination don't match
616 // we could still emit an extract vector element in that case.
617 if (DstNumElts < 2 * SrcNumElts)
618 return false;
619
620 // Check that the shuffle mask can be broken evenly between the
621 // different sources.
622 if (DstNumElts % SrcNumElts != 0)
623 return false;
624
625 // Mask length is a multiple of the source vector length.
626 // Check if the shuffle is some kind of concatenation of the input
627 // vectors.
628 unsigned NumConcat = DstNumElts / SrcNumElts;
629 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
630 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
631 for (unsigned i = 0; i != DstNumElts; ++i) {
632 int Idx = Mask[i];
633 // Undef value.
634 if (Idx < 0)
635 continue;
636 // Ensure the indices in each SrcType sized piece are sequential and that
637 // the same source is used for the whole piece.
638 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
639 (ConcatSrcs[i / SrcNumElts] >= 0 &&
640 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
641 return false;
642 // Remember which source this index came from.
643 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
644 }
645
646 // The shuffle is concatenating multiple vectors together.
647 // Collect the different operands for that.
648 Register UndefReg;
649 Register Src2 = MI.getOperand(2).getReg();
650 for (auto Src : ConcatSrcs) {
651 if (Src < 0) {
652 if (!UndefReg) {
653 Builder.setInsertPt(*MI.getParent(), MI);
654 UndefReg = Builder.buildUndef(SrcType).getReg(0);
655 }
656 Ops.push_back(UndefReg);
657 } else if (Src == 0)
658 Ops.push_back(Src1);
659 else
660 Ops.push_back(Src2);
661 }
662 return true;
663}
664
666 ArrayRef<Register> Ops) const {
667 Register DstReg = MI.getOperand(0).getReg();
668 Builder.setInsertPt(*MI.getParent(), MI);
669 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
670
671 if (Ops.size() == 1)
672 Builder.buildCopy(NewDstReg, Ops[0]);
673 else
674 Builder.buildMergeLikeInstr(NewDstReg, Ops);
675
676 replaceRegWith(MRI, DstReg, NewDstReg);
677 MI.eraseFromParent();
678}
679
680namespace {
681
682/// Select a preference between two uses. CurrentUse is the current preference
683/// while *ForCandidate is attributes of the candidate under consideration.
684PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
685 PreferredTuple &CurrentUse,
686 const LLT TyForCandidate,
687 unsigned OpcodeForCandidate,
688 MachineInstr *MIForCandidate) {
689 if (!CurrentUse.Ty.isValid()) {
690 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
691 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
692 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
693 return CurrentUse;
694 }
695
696 // We permit the extend to hoist through basic blocks but this is only
697 // sensible if the target has extending loads. If you end up lowering back
698 // into a load and extend during the legalizer then the end result is
699 // hoisting the extend up to the load.
700
701 // Prefer defined extensions to undefined extensions as these are more
702 // likely to reduce the number of instructions.
703 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
704 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
705 return CurrentUse;
706 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
707 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
708 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
709
710 // Prefer sign extensions to zero extensions as sign-extensions tend to be
711 // more expensive. Don't do this if the load is already a zero-extend load
712 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
713 // later.
714 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
715 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
716 OpcodeForCandidate == TargetOpcode::G_ZEXT)
717 return CurrentUse;
718 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
719 OpcodeForCandidate == TargetOpcode::G_SEXT)
720 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
721 }
722
723 // This is potentially target specific. We've chosen the largest type
724 // because G_TRUNC is usually free. One potential catch with this is that
725 // some targets have a reduced number of larger registers than smaller
726 // registers and this choice potentially increases the live-range for the
727 // larger value.
728 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
729 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
730 }
731 return CurrentUse;
732}
733
734/// Find a suitable place to insert some instructions and insert them. This
735/// function accounts for special cases like inserting before a PHI node.
736/// The current strategy for inserting before PHI's is to duplicate the
737/// instructions for each predecessor. However, while that's ok for G_TRUNC
738/// on most targets since it generally requires no code, other targets/cases may
739/// want to try harder to find a dominating block.
740static void InsertInsnsWithoutSideEffectsBeforeUse(
743 MachineOperand &UseMO)>
744 Inserter) {
745 MachineInstr &UseMI = *UseMO.getParent();
746
747 MachineBasicBlock *InsertBB = UseMI.getParent();
748
749 // If the use is a PHI then we want the predecessor block instead.
750 if (UseMI.isPHI()) {
751 MachineOperand *PredBB = std::next(&UseMO);
752 InsertBB = PredBB->getMBB();
753 }
754
755 // If the block is the same block as the def then we want to insert just after
756 // the def instead of at the start of the block.
757 if (InsertBB == DefMI.getParent()) {
759 Inserter(InsertBB, std::next(InsertPt), UseMO);
760 return;
761 }
762
763 // Otherwise we want the start of the BB
764 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
765}
766} // end anonymous namespace
767
769 PreferredTuple Preferred;
770 if (matchCombineExtendingLoads(MI, Preferred)) {
771 applyCombineExtendingLoads(MI, Preferred);
772 return true;
773 }
774 return false;
775}
776
777static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
778 unsigned CandidateLoadOpc;
779 switch (ExtOpc) {
780 case TargetOpcode::G_ANYEXT:
781 CandidateLoadOpc = TargetOpcode::G_LOAD;
782 break;
783 case TargetOpcode::G_SEXT:
784 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
785 break;
786 case TargetOpcode::G_ZEXT:
787 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
788 break;
789 default:
790 llvm_unreachable("Unexpected extend opc");
791 }
792 return CandidateLoadOpc;
793}
794
796 MachineInstr &MI, PreferredTuple &Preferred) const {
797 // We match the loads and follow the uses to the extend instead of matching
798 // the extends and following the def to the load. This is because the load
799 // must remain in the same position for correctness (unless we also add code
800 // to find a safe place to sink it) whereas the extend is freely movable.
801 // It also prevents us from duplicating the load for the volatile case or just
802 // for performance.
803 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
804 if (!LoadMI)
805 return false;
806
807 Register LoadReg = LoadMI->getDstReg();
808
809 LLT LoadValueTy = MRI.getType(LoadReg);
810 if (!LoadValueTy.isScalar())
811 return false;
812
813 // Most architectures are going to legalize <s8 loads into at least a 1 byte
814 // load, and the MMOs can only describe memory accesses in multiples of bytes.
815 // If we try to perform extload combining on those, we can end up with
816 // %a(s8) = extload %ptr (load 1 byte from %ptr)
817 // ... which is an illegal extload instruction.
818 if (LoadValueTy.getSizeInBits() < 8)
819 return false;
820
821 // For non power-of-2 types, they will very likely be legalized into multiple
822 // loads. Don't bother trying to match them into extending loads.
824 return false;
825
826 // Find the preferred type aside from the any-extends (unless it's the only
827 // one) and non-extending ops. We'll emit an extending load to that type and
828 // and emit a variant of (extend (trunc X)) for the others according to the
829 // relative type sizes. At the same time, pick an extend to use based on the
830 // extend involved in the chosen type.
831 unsigned PreferredOpcode =
832 isa<GLoad>(&MI)
833 ? TargetOpcode::G_ANYEXT
834 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
835 Preferred = {LLT(), PreferredOpcode, nullptr};
836 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
837 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
838 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
839 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
840 const auto &MMO = LoadMI->getMMO();
841 // Don't do anything for atomics.
842 if (MMO.isAtomic())
843 continue;
844 // Check for legality.
845 if (!isPreLegalize()) {
846 LegalityQuery::MemDesc MMDesc(MMO);
847 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
848 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
849 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
850 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
851 .Action != LegalizeActions::Legal)
852 continue;
853 }
854 Preferred = ChoosePreferredUse(MI, Preferred,
855 MRI.getType(UseMI.getOperand(0).getReg()),
856 UseMI.getOpcode(), &UseMI);
857 }
858 }
859
860 // There were no extends
861 if (!Preferred.MI)
862 return false;
863 // It should be impossible to chose an extend without selecting a different
864 // type since by definition the result of an extend is larger.
865 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
866
867 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
868 return true;
869}
870
872 MachineInstr &MI, PreferredTuple &Preferred) const {
873 // Rewrite the load to the chosen extending load.
874 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
875
876 // Inserter to insert a truncate back to the original type at a given point
877 // with some basic CSE to limit truncate duplication to one per BB.
879 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
880 MachineBasicBlock::iterator InsertBefore,
881 MachineOperand &UseMO) {
882 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
883 if (PreviouslyEmitted) {
884 Observer.changingInstr(*UseMO.getParent());
885 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
886 Observer.changedInstr(*UseMO.getParent());
887 return;
888 }
889
890 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
891 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
892 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
893 EmittedInsns[InsertIntoBB] = NewMI;
894 replaceRegOpWith(MRI, UseMO, NewDstReg);
895 };
896
897 Observer.changingInstr(MI);
898 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
899 MI.setDesc(Builder.getTII().get(LoadOpc));
900
901 // Rewrite all the uses to fix up the types.
902 auto &LoadValue = MI.getOperand(0);
904 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
905
906 for (auto *UseMO : Uses) {
907 MachineInstr *UseMI = UseMO->getParent();
908
909 // If the extend is compatible with the preferred extend then we should fix
910 // up the type and extend so that it uses the preferred use.
911 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
912 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
913 Register UseDstReg = UseMI->getOperand(0).getReg();
914 MachineOperand &UseSrcMO = UseMI->getOperand(1);
915 const LLT UseDstTy = MRI.getType(UseDstReg);
916 if (UseDstReg != ChosenDstReg) {
917 if (Preferred.Ty == UseDstTy) {
918 // If the use has the same type as the preferred use, then merge
919 // the vregs and erase the extend. For example:
920 // %1:_(s8) = G_LOAD ...
921 // %2:_(s32) = G_SEXT %1(s8)
922 // %3:_(s32) = G_ANYEXT %1(s8)
923 // ... = ... %3(s32)
924 // rewrites to:
925 // %2:_(s32) = G_SEXTLOAD ...
926 // ... = ... %2(s32)
927 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
928 Observer.erasingInstr(*UseMO->getParent());
929 UseMO->getParent()->eraseFromParent();
930 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
931 // If the preferred size is smaller, then keep the extend but extend
932 // from the result of the extending load. For example:
933 // %1:_(s8) = G_LOAD ...
934 // %2:_(s32) = G_SEXT %1(s8)
935 // %3:_(s64) = G_ANYEXT %1(s8)
936 // ... = ... %3(s64)
937 /// rewrites to:
938 // %2:_(s32) = G_SEXTLOAD ...
939 // %3:_(s64) = G_ANYEXT %2:_(s32)
940 // ... = ... %3(s64)
941 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
942 } else {
943 // If the preferred size is large, then insert a truncate. For
944 // example:
945 // %1:_(s8) = G_LOAD ...
946 // %2:_(s64) = G_SEXT %1(s8)
947 // %3:_(s32) = G_ZEXT %1(s8)
948 // ... = ... %3(s32)
949 /// rewrites to:
950 // %2:_(s64) = G_SEXTLOAD ...
951 // %4:_(s8) = G_TRUNC %2:_(s32)
952 // %3:_(s64) = G_ZEXT %2:_(s8)
953 // ... = ... %3(s64)
954 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
955 InsertTruncAt);
956 }
957 continue;
958 }
959 // The use is (one of) the uses of the preferred use we chose earlier.
960 // We're going to update the load to def this value later so just erase
961 // the old extend.
962 Observer.erasingInstr(*UseMO->getParent());
963 UseMO->getParent()->eraseFromParent();
964 continue;
965 }
966
967 // The use isn't an extend. Truncate back to the type we originally loaded.
968 // This is free on many targets.
969 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
970 }
971
972 MI.getOperand(0).setReg(ChosenDstReg);
973 Observer.changedInstr(MI);
974}
975
977 BuildFnTy &MatchInfo) const {
978 assert(MI.getOpcode() == TargetOpcode::G_AND);
979
980 // If we have the following code:
981 // %mask = G_CONSTANT 255
982 // %ld = G_LOAD %ptr, (load s16)
983 // %and = G_AND %ld, %mask
984 //
985 // Try to fold it into
986 // %ld = G_ZEXTLOAD %ptr, (load s8)
987
988 Register Dst = MI.getOperand(0).getReg();
989 if (MRI.getType(Dst).isVector())
990 return false;
991
992 auto MaybeMask =
993 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
994 if (!MaybeMask)
995 return false;
996
997 APInt MaskVal = MaybeMask->Value;
998
999 if (!MaskVal.isMask())
1000 return false;
1001
1002 Register SrcReg = MI.getOperand(1).getReg();
1003 // Don't use getOpcodeDef() here since intermediate instructions may have
1004 // multiple users.
1005 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
1006 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
1007 return false;
1008
1009 Register LoadReg = LoadMI->getDstReg();
1010 LLT RegTy = MRI.getType(LoadReg);
1011 Register PtrReg = LoadMI->getPointerReg();
1012 unsigned RegSize = RegTy.getSizeInBits();
1013 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
1014 unsigned MaskSizeBits = MaskVal.countr_one();
1015
1016 // The mask may not be larger than the in-memory type, as it might cover sign
1017 // extended bits
1018 if (MaskSizeBits > LoadSizeBits.getValue())
1019 return false;
1020
1021 // If the mask covers the whole destination register, there's nothing to
1022 // extend
1023 if (MaskSizeBits >= RegSize)
1024 return false;
1025
1026 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1027 // at least byte loads. Avoid creating such loads here
1028 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1029 return false;
1030
1031 const MachineMemOperand &MMO = LoadMI->getMMO();
1032 LegalityQuery::MemDesc MemDesc(MMO);
1033
1034 // Don't modify the memory access size if this is atomic/volatile, but we can
1035 // still adjust the opcode to indicate the high bit behavior.
1036 if (LoadMI->isSimple())
1037 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1038 else if (LoadSizeBits.getValue() > MaskSizeBits ||
1039 LoadSizeBits.getValue() == RegSize)
1040 return false;
1041
1042 // TODO: Could check if it's legal with the reduced or original memory size.
1044 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1045 return false;
1046
1047 MatchInfo = [=](MachineIRBuilder &B) {
1048 B.setInstrAndDebugLoc(*LoadMI);
1049 auto &MF = B.getMF();
1050 auto PtrInfo = MMO.getPointerInfo();
1051 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1052 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1053 LoadMI->eraseFromParent();
1054 };
1055 return true;
1056}
1057
1059 const MachineInstr &UseMI) const {
1060 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1061 "shouldn't consider debug uses");
1062 assert(DefMI.getParent() == UseMI.getParent());
1063 if (&DefMI == &UseMI)
1064 return true;
1065 const MachineBasicBlock &MBB = *DefMI.getParent();
1066 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1067 return &MI == &DefMI || &MI == &UseMI;
1068 });
1069 if (DefOrUse == MBB.end())
1070 llvm_unreachable("Block must contain both DefMI and UseMI!");
1071 return &*DefOrUse == &DefMI;
1072}
1073
1075 const MachineInstr &UseMI) const {
1076 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1077 "shouldn't consider debug uses");
1078 if (MDT)
1079 return MDT->dominates(&DefMI, &UseMI);
1080 else if (DefMI.getParent() != UseMI.getParent())
1081 return false;
1082
1083 return isPredecessor(DefMI, UseMI);
1084}
1085
1087 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1088 Register SrcReg = MI.getOperand(1).getReg();
1089 Register LoadUser = SrcReg;
1090
1091 if (MRI.getType(SrcReg).isVector())
1092 return false;
1093
1094 Register TruncSrc;
1095 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1096 LoadUser = TruncSrc;
1097
1098 uint64_t SizeInBits = MI.getOperand(2).getImm();
1099 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1100 // need any extend at all, just a truncate.
1101 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1102 // If truncating more than the original extended value, abort.
1103 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1104 if (TruncSrc &&
1105 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1106 return false;
1107 if (LoadSizeBits == SizeInBits)
1108 return true;
1109 }
1110 return false;
1111}
1112
1114 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1115 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1116 MI.eraseFromParent();
1117}
1118
1120 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1121 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1122
1123 Register DstReg = MI.getOperand(0).getReg();
1124 LLT RegTy = MRI.getType(DstReg);
1125
1126 // Only supports scalars for now.
1127 if (RegTy.isVector())
1128 return false;
1129
1130 Register SrcReg = MI.getOperand(1).getReg();
1131 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1132 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1133 return false;
1134
1135 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1136
1137 // If the sign extend extends from a narrower width than the load's width,
1138 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1139 // Avoid widening the load at all.
1140 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1141
1142 // Don't generate G_SEXTLOADs with a < 1 byte width.
1143 if (NewSizeBits < 8)
1144 return false;
1145 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1146 // anyway for most targets.
1147 if (!isPowerOf2_32(NewSizeBits))
1148 return false;
1149
1150 const MachineMemOperand &MMO = LoadDef->getMMO();
1151 LegalityQuery::MemDesc MMDesc(MMO);
1152
1153 // Don't modify the memory access size if this is atomic/volatile, but we can
1154 // still adjust the opcode to indicate the high bit behavior.
1155 if (LoadDef->isSimple())
1156 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1157 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1158 return false;
1159
1160 // TODO: Could check if it's legal with the reduced or original memory size.
1161 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1162 {MRI.getType(LoadDef->getDstReg()),
1163 MRI.getType(LoadDef->getPointerReg())},
1164 {MMDesc}}))
1165 return false;
1166
1167 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1168 return true;
1169}
1170
1172 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1173 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1174 Register LoadReg;
1175 unsigned ScalarSizeBits;
1176 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1177 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1178
1179 // If we have the following:
1180 // %ld = G_LOAD %ptr, (load 2)
1181 // %ext = G_SEXT_INREG %ld, 8
1182 // ==>
1183 // %ld = G_SEXTLOAD %ptr (load 1)
1184
1185 auto &MMO = LoadDef->getMMO();
1186 Builder.setInstrAndDebugLoc(*LoadDef);
1187 auto &MF = Builder.getMF();
1188 auto PtrInfo = MMO.getPointerInfo();
1189 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1190 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1191 LoadDef->getPointerReg(), *NewMMO);
1192 MI.eraseFromParent();
1193
1194 // Not all loads can be deleted, so make sure the old one is removed.
1195 LoadDef->eraseFromParent();
1196}
1197
1198/// Return true if 'MI' is a load or a store that may be fold it's address
1199/// operand into the load / store addressing mode.
1201 MachineRegisterInfo &MRI) {
1203 auto *MF = MI->getMF();
1204 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1205 if (!Addr)
1206 return false;
1207
1208 AM.HasBaseReg = true;
1209 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1210 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1211 else
1212 AM.Scale = 1; // [reg +/- reg]
1213
1214 return TLI.isLegalAddressingMode(
1215 MF->getDataLayout(), AM,
1216 getTypeForLLT(MI->getMMO().getMemoryType(),
1217 MF->getFunction().getContext()),
1218 MI->getMMO().getAddrSpace());
1219}
1220
1221static unsigned getIndexedOpc(unsigned LdStOpc) {
1222 switch (LdStOpc) {
1223 case TargetOpcode::G_LOAD:
1224 return TargetOpcode::G_INDEXED_LOAD;
1225 case TargetOpcode::G_STORE:
1226 return TargetOpcode::G_INDEXED_STORE;
1227 case TargetOpcode::G_ZEXTLOAD:
1228 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1229 case TargetOpcode::G_SEXTLOAD:
1230 return TargetOpcode::G_INDEXED_SEXTLOAD;
1231 default:
1232 llvm_unreachable("Unexpected opcode");
1233 }
1234}
1235
1236bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1237 // Check for legality.
1238 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1239 LLT Ty = MRI.getType(LdSt.getReg(0));
1240 LLT MemTy = LdSt.getMMO().getMemoryType();
1242 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1244 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1245 SmallVector<LLT> OpTys;
1246 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1247 OpTys = {PtrTy, Ty, Ty};
1248 else
1249 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1250
1251 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1252 return isLegal(Q);
1253}
1254
1256 "post-index-use-threshold", cl::Hidden, cl::init(32),
1257 cl::desc("Number of uses of a base pointer to check before it is no longer "
1258 "considered for post-indexing."));
1259
1260bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1262 bool &RematOffset) const {
1263 // We're looking for the following pattern, for either load or store:
1264 // %baseptr:_(p0) = ...
1265 // G_STORE %val(s64), %baseptr(p0)
1266 // %offset:_(s64) = G_CONSTANT i64 -256
1267 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1268 const auto &TLI = getTargetLowering();
1269
1270 Register Ptr = LdSt.getPointerReg();
1271 // If the store is the only use, don't bother.
1272 if (MRI.hasOneNonDBGUse(Ptr))
1273 return false;
1274
1275 if (!isIndexedLoadStoreLegal(LdSt))
1276 return false;
1277
1278 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1279 return false;
1280
1281 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1282 auto *PtrDef = MRI.getVRegDef(Ptr);
1283
1284 unsigned NumUsesChecked = 0;
1285 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1286 if (++NumUsesChecked > PostIndexUseThreshold)
1287 return false; // Try to avoid exploding compile time.
1288
1289 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1290 // The use itself might be dead. This can happen during combines if DCE
1291 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1292 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1293 continue;
1294
1295 // Check the user of this isn't the store, otherwise we'd be generate a
1296 // indexed store defining its own use.
1297 if (StoredValDef == &Use)
1298 continue;
1299
1300 Offset = PtrAdd->getOffsetReg();
1301 if (!ForceLegalIndexing &&
1302 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1303 /*IsPre*/ false, MRI))
1304 continue;
1305
1306 // Make sure the offset calculation is before the potentially indexed op.
1307 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1308 RematOffset = false;
1309 if (!dominates(*OffsetDef, LdSt)) {
1310 // If the offset however is just a G_CONSTANT, we can always just
1311 // rematerialize it where we need it.
1312 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1313 continue;
1314 RematOffset = true;
1315 }
1316
1317 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1318 if (&BasePtrUse == PtrDef)
1319 continue;
1320
1321 // If the user is a later load/store that can be post-indexed, then don't
1322 // combine this one.
1323 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1324 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1325 dominates(LdSt, *BasePtrLdSt) &&
1326 isIndexedLoadStoreLegal(*BasePtrLdSt))
1327 return false;
1328
1329 // Now we're looking for the key G_PTR_ADD instruction, which contains
1330 // the offset add that we want to fold.
1331 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1332 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1333 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1334 // If the use is in a different block, then we may produce worse code
1335 // due to the extra register pressure.
1336 if (BaseUseUse.getParent() != LdSt.getParent())
1337 return false;
1338
1339 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1340 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1341 return false;
1342 }
1343 if (!dominates(LdSt, BasePtrUse))
1344 return false; // All use must be dominated by the load/store.
1345 }
1346 }
1347
1348 Addr = PtrAdd->getReg(0);
1349 Base = PtrAdd->getBaseReg();
1350 return true;
1351 }
1352
1353 return false;
1354}
1355
1356bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1357 Register &Base,
1358 Register &Offset) const {
1359 auto &MF = *LdSt.getParent()->getParent();
1360 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1361
1362 Addr = LdSt.getPointerReg();
1363 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1364 MRI.hasOneNonDBGUse(Addr))
1365 return false;
1366
1367 if (!ForceLegalIndexing &&
1368 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1369 return false;
1370
1371 if (!isIndexedLoadStoreLegal(LdSt))
1372 return false;
1373
1374 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1375 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1376 return false;
1377
1378 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1379 // Would require a copy.
1380 if (Base == St->getValueReg())
1381 return false;
1382
1383 // We're expecting one use of Addr in MI, but it could also be the
1384 // value stored, which isn't actually dominated by the instruction.
1385 if (St->getValueReg() == Addr)
1386 return false;
1387 }
1388
1389 // Avoid increasing cross-block register pressure.
1390 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1391 if (AddrUse.getParent() != LdSt.getParent())
1392 return false;
1393
1394 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1395 // That might allow us to end base's liveness here by adjusting the constant.
1396 bool RealUse = false;
1397 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1398 if (!dominates(LdSt, AddrUse))
1399 return false; // All use must be dominated by the load/store.
1400
1401 // If Ptr may be folded in addressing mode of other use, then it's
1402 // not profitable to do this transformation.
1403 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1404 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1405 RealUse = true;
1406 } else {
1407 RealUse = true;
1408 }
1409 }
1410 return RealUse;
1411}
1412
1414 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1415 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1416
1417 // Check if there is a load that defines the vector being extracted from.
1418 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1419 if (!LoadMI)
1420 return false;
1421
1422 Register Vector = MI.getOperand(1).getReg();
1423 LLT VecEltTy = MRI.getType(Vector).getElementType();
1424
1425 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1426
1427 // Checking whether we should reduce the load width.
1428 if (!MRI.hasOneNonDBGUse(Vector))
1429 return false;
1430
1431 // Check if the defining load is simple.
1432 if (!LoadMI->isSimple())
1433 return false;
1434
1435 // If the vector element type is not a multiple of a byte then we are unable
1436 // to correctly compute an address to load only the extracted element as a
1437 // scalar.
1438 if (!VecEltTy.isByteSized())
1439 return false;
1440
1441 // Check for load fold barriers between the extraction and the load.
1442 if (MI.getParent() != LoadMI->getParent())
1443 return false;
1444 const unsigned MaxIter = 20;
1445 unsigned Iter = 0;
1446 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1447 if (II->isLoadFoldBarrier())
1448 return false;
1449 if (Iter++ == MaxIter)
1450 return false;
1451 }
1452
1453 // Check if the new load that we are going to create is legal
1454 // if we are in the post-legalization phase.
1455 MachineMemOperand MMO = LoadMI->getMMO();
1456 Align Alignment = MMO.getAlign();
1457 MachinePointerInfo PtrInfo;
1459
1460 // Finding the appropriate PtrInfo if offset is a known constant.
1461 // This is required to create the memory operand for the narrowed load.
1462 // This machine memory operand object helps us infer about legality
1463 // before we proceed to combine the instruction.
1464 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1465 int Elt = CVal->getZExtValue();
1466 // FIXME: should be (ABI size)*Elt.
1467 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1468 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1469 } else {
1470 // Discard the pointer info except the address space because the memory
1471 // operand can't represent this new access since the offset is variable.
1472 Offset = VecEltTy.getSizeInBits() / 8;
1474 }
1475
1476 Alignment = commonAlignment(Alignment, Offset);
1477
1478 Register VecPtr = LoadMI->getPointerReg();
1479 LLT PtrTy = MRI.getType(VecPtr);
1480
1481 MachineFunction &MF = *MI.getMF();
1482 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1483
1484 LegalityQuery::MemDesc MMDesc(*NewMMO);
1485
1487 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1488 return false;
1489
1490 // Load must be allowed and fast on the target.
1492 auto &DL = MF.getDataLayout();
1493 unsigned Fast = 0;
1494 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1495 &Fast) ||
1496 !Fast)
1497 return false;
1498
1499 Register Result = MI.getOperand(0).getReg();
1500 Register Index = MI.getOperand(2).getReg();
1501
1502 MatchInfo = [=](MachineIRBuilder &B) {
1503 GISelObserverWrapper DummyObserver;
1504 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1505 //// Get pointer to the vector element.
1506 Register finalPtr = Helper.getVectorElementPointer(
1507 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1508 Index);
1509 // New G_LOAD instruction.
1510 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1511 // Remove original GLOAD instruction.
1512 LoadMI->eraseFromParent();
1513 };
1514
1515 return true;
1516}
1517
1519 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1520 auto &LdSt = cast<GLoadStore>(MI);
1521
1522 if (LdSt.isAtomic())
1523 return false;
1524
1525 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1526 MatchInfo.Offset);
1527 if (!MatchInfo.IsPre &&
1528 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1529 MatchInfo.Offset, MatchInfo.RematOffset))
1530 return false;
1531
1532 return true;
1533}
1534
1536 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1537 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1538 unsigned Opcode = MI.getOpcode();
1539 bool IsStore = Opcode == TargetOpcode::G_STORE;
1540 unsigned NewOpcode = getIndexedOpc(Opcode);
1541
1542 // If the offset constant didn't happen to dominate the load/store, we can
1543 // just clone it as needed.
1544 if (MatchInfo.RematOffset) {
1545 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1546 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1547 *OldCst->getOperand(1).getCImm());
1548 MatchInfo.Offset = NewCst.getReg(0);
1549 }
1550
1551 auto MIB = Builder.buildInstr(NewOpcode);
1552 if (IsStore) {
1553 MIB.addDef(MatchInfo.Addr);
1554 MIB.addUse(MI.getOperand(0).getReg());
1555 } else {
1556 MIB.addDef(MI.getOperand(0).getReg());
1557 MIB.addDef(MatchInfo.Addr);
1558 }
1559
1560 MIB.addUse(MatchInfo.Base);
1561 MIB.addUse(MatchInfo.Offset);
1562 MIB.addImm(MatchInfo.IsPre);
1563 MIB->cloneMemRefs(*MI.getMF(), MI);
1564 MI.eraseFromParent();
1565 AddrDef.eraseFromParent();
1566
1567 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1568}
1569
1571 MachineInstr *&OtherMI) const {
1572 unsigned Opcode = MI.getOpcode();
1573 bool IsDiv, IsSigned;
1574
1575 switch (Opcode) {
1576 default:
1577 llvm_unreachable("Unexpected opcode!");
1578 case TargetOpcode::G_SDIV:
1579 case TargetOpcode::G_UDIV: {
1580 IsDiv = true;
1581 IsSigned = Opcode == TargetOpcode::G_SDIV;
1582 break;
1583 }
1584 case TargetOpcode::G_SREM:
1585 case TargetOpcode::G_UREM: {
1586 IsDiv = false;
1587 IsSigned = Opcode == TargetOpcode::G_SREM;
1588 break;
1589 }
1590 }
1591
1592 Register Src1 = MI.getOperand(1).getReg();
1593 unsigned DivOpcode, RemOpcode, DivremOpcode;
1594 if (IsSigned) {
1595 DivOpcode = TargetOpcode::G_SDIV;
1596 RemOpcode = TargetOpcode::G_SREM;
1597 DivremOpcode = TargetOpcode::G_SDIVREM;
1598 } else {
1599 DivOpcode = TargetOpcode::G_UDIV;
1600 RemOpcode = TargetOpcode::G_UREM;
1601 DivremOpcode = TargetOpcode::G_UDIVREM;
1602 }
1603
1604 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1605 return false;
1606
1607 // Combine:
1608 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1609 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1610 // into:
1611 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1612
1613 // Combine:
1614 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1615 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1616 // into:
1617 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1618
1619 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1620 if (MI.getParent() == UseMI.getParent() &&
1621 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1622 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1623 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1624 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1625 OtherMI = &UseMI;
1626 return true;
1627 }
1628 }
1629
1630 return false;
1631}
1632
1634 MachineInstr *&OtherMI) const {
1635 unsigned Opcode = MI.getOpcode();
1636 assert(OtherMI && "OtherMI shouldn't be empty.");
1637
1638 Register DestDivReg, DestRemReg;
1639 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1640 DestDivReg = MI.getOperand(0).getReg();
1641 DestRemReg = OtherMI->getOperand(0).getReg();
1642 } else {
1643 DestDivReg = OtherMI->getOperand(0).getReg();
1644 DestRemReg = MI.getOperand(0).getReg();
1645 }
1646
1647 bool IsSigned =
1648 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1649
1650 // Check which instruction is first in the block so we don't break def-use
1651 // deps by "moving" the instruction incorrectly. Also keep track of which
1652 // instruction is first so we pick it's operands, avoiding use-before-def
1653 // bugs.
1654 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1655 Builder.setInstrAndDebugLoc(*FirstInst);
1656
1657 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1658 : TargetOpcode::G_UDIVREM,
1659 {DestDivReg, DestRemReg},
1660 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1661 MI.eraseFromParent();
1662 OtherMI->eraseFromParent();
1663}
1664
1666 MachineInstr &MI, MachineInstr *&BrCond) const {
1667 assert(MI.getOpcode() == TargetOpcode::G_BR);
1668
1669 // Try to match the following:
1670 // bb1:
1671 // G_BRCOND %c1, %bb2
1672 // G_BR %bb3
1673 // bb2:
1674 // ...
1675 // bb3:
1676
1677 // The above pattern does not have a fall through to the successor bb2, always
1678 // resulting in a branch no matter which path is taken. Here we try to find
1679 // and replace that pattern with conditional branch to bb3 and otherwise
1680 // fallthrough to bb2. This is generally better for branch predictors.
1681
1682 MachineBasicBlock *MBB = MI.getParent();
1684 if (BrIt == MBB->begin())
1685 return false;
1686 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1687
1688 BrCond = &*std::prev(BrIt);
1689 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1690 return false;
1691
1692 // Check that the next block is the conditional branch target. Also make sure
1693 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1694 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1695 return BrCondTarget != MI.getOperand(0).getMBB() &&
1696 MBB->isLayoutSuccessor(BrCondTarget);
1697}
1698
1700 MachineInstr &MI, MachineInstr *&BrCond) const {
1701 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1702 Builder.setInstrAndDebugLoc(*BrCond);
1703 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1704 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1705 // this to i1 only since we might not know for sure what kind of
1706 // compare generated the condition value.
1707 auto True = Builder.buildConstant(
1708 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1709 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1710
1711 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1712 Observer.changingInstr(MI);
1713 MI.getOperand(0).setMBB(FallthroughBB);
1714 Observer.changedInstr(MI);
1715
1716 // Change the conditional branch to use the inverted condition and
1717 // new target block.
1718 Observer.changingInstr(*BrCond);
1719 BrCond->getOperand(0).setReg(Xor.getReg(0));
1720 BrCond->getOperand(1).setMBB(BrTarget);
1721 Observer.changedInstr(*BrCond);
1722}
1723
1725 MachineIRBuilder HelperBuilder(MI);
1726 GISelObserverWrapper DummyObserver;
1727 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1728 return Helper.lowerMemcpyInline(MI) ==
1730}
1731
1733 unsigned MaxLen) const {
1734 MachineIRBuilder HelperBuilder(MI);
1735 GISelObserverWrapper DummyObserver;
1736 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1737 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1739}
1740
1742 const MachineRegisterInfo &MRI,
1743 const APFloat &Val) {
1744 APFloat Result(Val);
1745 switch (MI.getOpcode()) {
1746 default:
1747 llvm_unreachable("Unexpected opcode!");
1748 case TargetOpcode::G_FNEG: {
1749 Result.changeSign();
1750 return Result;
1751 }
1752 case TargetOpcode::G_FABS: {
1753 Result.clearSign();
1754 return Result;
1755 }
1756 case TargetOpcode::G_FCEIL:
1757 Result.roundToIntegral(APFloat::rmTowardPositive);
1758 return Result;
1759 case TargetOpcode::G_FFLOOR:
1760 Result.roundToIntegral(APFloat::rmTowardNegative);
1761 return Result;
1762 case TargetOpcode::G_INTRINSIC_TRUNC:
1763 Result.roundToIntegral(APFloat::rmTowardZero);
1764 return Result;
1765 case TargetOpcode::G_INTRINSIC_ROUND:
1766 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1767 return Result;
1768 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1769 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1770 return Result;
1771 case TargetOpcode::G_FRINT:
1772 case TargetOpcode::G_FNEARBYINT:
1773 // Use default rounding mode (round to nearest, ties to even)
1774 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1775 return Result;
1776 case TargetOpcode::G_FPEXT:
1777 case TargetOpcode::G_FPTRUNC: {
1778 bool Unused;
1779 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1781 &Unused);
1782 return Result;
1783 }
1784 case TargetOpcode::G_FSQRT: {
1785 bool Unused;
1787 &Unused);
1788 Result = APFloat(sqrt(Result.convertToDouble()));
1789 break;
1790 }
1791 case TargetOpcode::G_FLOG2: {
1792 bool Unused;
1794 &Unused);
1795 Result = APFloat(log2(Result.convertToDouble()));
1796 break;
1797 }
1798 }
1799 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1800 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1801 // `G_FLOG2` reach here.
1802 bool Unused;
1803 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1804 return Result;
1805}
1806
1808 MachineInstr &MI, const ConstantFP *Cst) const {
1809 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1810 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1811 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1812 MI.eraseFromParent();
1813}
1814
1816 PtrAddChain &MatchInfo) const {
1817 // We're trying to match the following pattern:
1818 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1819 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1820 // -->
1821 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1822
1823 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1824 return false;
1825
1826 Register Add2 = MI.getOperand(1).getReg();
1827 Register Imm1 = MI.getOperand(2).getReg();
1828 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1829 if (!MaybeImmVal)
1830 return false;
1831
1832 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1833 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1834 return false;
1835
1836 Register Base = Add2Def->getOperand(1).getReg();
1837 Register Imm2 = Add2Def->getOperand(2).getReg();
1838 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1839 if (!MaybeImm2Val)
1840 return false;
1841
1842 // Check if the new combined immediate forms an illegal addressing mode.
1843 // Do not combine if it was legal before but would get illegal.
1844 // To do so, we need to find a load/store user of the pointer to get
1845 // the access type.
1846 Type *AccessTy = nullptr;
1847 auto &MF = *MI.getMF();
1848 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1849 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1850 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1851 MF.getFunction().getContext());
1852 break;
1853 }
1854 }
1856 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1857 AMNew.BaseOffs = CombinedImm.getSExtValue();
1858 if (AccessTy) {
1859 AMNew.HasBaseReg = true;
1861 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1862 AMOld.HasBaseReg = true;
1863 unsigned AS = MRI.getType(Add2).getAddressSpace();
1864 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1865 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1866 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1867 return false;
1868 }
1869
1870 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1871 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1872 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1873 // largest signed integer that fits into the index type, which is the maximum
1874 // size of allocated objects according to the IR Language Reference.
1875 unsigned PtrAddFlags = MI.getFlags();
1876 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1877 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1878 bool IsInBounds =
1879 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1880 unsigned Flags = 0;
1881 if (IsNoUWrap)
1883 if (IsInBounds) {
1886 }
1887
1888 // Pass the combined immediate to the apply function.
1889 MatchInfo.Imm = AMNew.BaseOffs;
1890 MatchInfo.Base = Base;
1891 MatchInfo.Bank = getRegBank(Imm2);
1892 MatchInfo.Flags = Flags;
1893 return true;
1894}
1895
1897 PtrAddChain &MatchInfo) const {
1898 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1899 MachineIRBuilder MIB(MI);
1900 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1901 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1902 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1903 Observer.changingInstr(MI);
1904 MI.getOperand(1).setReg(MatchInfo.Base);
1905 MI.getOperand(2).setReg(NewOffset.getReg(0));
1906 MI.setFlags(MatchInfo.Flags);
1907 Observer.changedInstr(MI);
1908}
1909
1911 RegisterImmPair &MatchInfo) const {
1912 // We're trying to match the following pattern with any of
1913 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1914 // %t1 = SHIFT %base, G_CONSTANT imm1
1915 // %root = SHIFT %t1, G_CONSTANT imm2
1916 // -->
1917 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1918
1919 unsigned Opcode = MI.getOpcode();
1920 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1921 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1922 Opcode == TargetOpcode::G_USHLSAT) &&
1923 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1924
1925 Register Shl2 = MI.getOperand(1).getReg();
1926 Register Imm1 = MI.getOperand(2).getReg();
1927 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1928 if (!MaybeImmVal)
1929 return false;
1930
1931 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1932 if (Shl2Def->getOpcode() != Opcode)
1933 return false;
1934
1935 Register Base = Shl2Def->getOperand(1).getReg();
1936 Register Imm2 = Shl2Def->getOperand(2).getReg();
1937 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1938 if (!MaybeImm2Val)
1939 return false;
1940
1941 // Pass the combined immediate to the apply function.
1942 MatchInfo.Imm =
1943 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1944 MatchInfo.Reg = Base;
1945
1946 // There is no simple replacement for a saturating unsigned left shift that
1947 // exceeds the scalar size.
1948 if (Opcode == TargetOpcode::G_USHLSAT &&
1949 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1950 return false;
1951
1952 return true;
1953}
1954
1956 RegisterImmPair &MatchInfo) const {
1957 unsigned Opcode = MI.getOpcode();
1958 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1959 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1960 Opcode == TargetOpcode::G_USHLSAT) &&
1961 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1962
1963 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1964 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1965 auto Imm = MatchInfo.Imm;
1966
1967 if (Imm >= ScalarSizeInBits) {
1968 // Any logical shift that exceeds scalar size will produce zero.
1969 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1970 Builder.buildConstant(MI.getOperand(0), 0);
1971 MI.eraseFromParent();
1972 return;
1973 }
1974 // Arithmetic shift and saturating signed left shift have no effect beyond
1975 // scalar size.
1976 Imm = ScalarSizeInBits - 1;
1977 }
1978
1979 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1980 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1981 Observer.changingInstr(MI);
1982 MI.getOperand(1).setReg(MatchInfo.Reg);
1983 MI.getOperand(2).setReg(NewImm);
1984 Observer.changedInstr(MI);
1985}
1986
1988 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1989 // We're trying to match the following pattern with any of
1990 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1991 // with any of G_AND/G_OR/G_XOR logic instructions.
1992 // %t1 = SHIFT %X, G_CONSTANT C0
1993 // %t2 = LOGIC %t1, %Y
1994 // %root = SHIFT %t2, G_CONSTANT C1
1995 // -->
1996 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1997 // %t4 = SHIFT %Y, G_CONSTANT C1
1998 // %root = LOGIC %t3, %t4
1999 unsigned ShiftOpcode = MI.getOpcode();
2000 assert((ShiftOpcode == TargetOpcode::G_SHL ||
2001 ShiftOpcode == TargetOpcode::G_ASHR ||
2002 ShiftOpcode == TargetOpcode::G_LSHR ||
2003 ShiftOpcode == TargetOpcode::G_USHLSAT ||
2004 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
2005 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2006
2007 // Match a one-use bitwise logic op.
2008 Register LogicDest = MI.getOperand(1).getReg();
2009 if (!MRI.hasOneNonDBGUse(LogicDest))
2010 return false;
2011
2012 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
2013 unsigned LogicOpcode = LogicMI->getOpcode();
2014 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
2015 LogicOpcode != TargetOpcode::G_XOR)
2016 return false;
2017
2018 // Find a matching one-use shift by constant.
2019 const Register C1 = MI.getOperand(2).getReg();
2020 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
2021 if (!MaybeImmVal || MaybeImmVal->Value == 0)
2022 return false;
2023
2024 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
2025
2026 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
2027 // Shift should match previous one and should be a one-use.
2028 if (MI->getOpcode() != ShiftOpcode ||
2029 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2030 return false;
2031
2032 // Must be a constant.
2033 auto MaybeImmVal =
2034 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
2035 if (!MaybeImmVal)
2036 return false;
2037
2038 ShiftVal = MaybeImmVal->Value.getSExtValue();
2039 return true;
2040 };
2041
2042 // Logic ops are commutative, so check each operand for a match.
2043 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
2044 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
2045 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2046 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2047 uint64_t C0Val;
2048
2049 if (matchFirstShift(LogicMIOp1, C0Val)) {
2050 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2051 MatchInfo.Shift2 = LogicMIOp1;
2052 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2053 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2054 MatchInfo.Shift2 = LogicMIOp2;
2055 } else
2056 return false;
2057
2058 MatchInfo.ValSum = C0Val + C1Val;
2059
2060 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2061 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2062 return false;
2063
2064 MatchInfo.Logic = LogicMI;
2065 return true;
2066}
2067
2069 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2070 unsigned Opcode = MI.getOpcode();
2071 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2072 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2073 Opcode == TargetOpcode::G_SSHLSAT) &&
2074 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2075
2076 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2077 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2078
2079 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2080
2081 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2082 Register Shift1 =
2083 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2084
2085 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2086 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2087 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2088 // remove old shift1. And it will cause crash later. So erase it earlier to
2089 // avoid the crash.
2090 MatchInfo.Shift2->eraseFromParent();
2091
2092 Register Shift2Const = MI.getOperand(2).getReg();
2093 Register Shift2 = Builder
2094 .buildInstr(Opcode, {DestType},
2095 {MatchInfo.LogicNonShiftReg, Shift2Const})
2096 .getReg(0);
2097
2098 Register Dest = MI.getOperand(0).getReg();
2099 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2100
2101 // This was one use so it's safe to remove it.
2102 MatchInfo.Logic->eraseFromParent();
2103
2104 MI.eraseFromParent();
2105}
2106
2108 BuildFnTy &MatchInfo) const {
2109 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2110 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2111 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2112 auto &Shl = cast<GenericMachineInstr>(MI);
2113 Register DstReg = Shl.getReg(0);
2114 Register SrcReg = Shl.getReg(1);
2115 Register ShiftReg = Shl.getReg(2);
2116 Register X, C1;
2117
2118 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2119 return false;
2120
2121 if (!mi_match(SrcReg, MRI,
2123 m_GOr(m_Reg(X), m_Reg(C1))))))
2124 return false;
2125
2126 APInt C1Val, C2Val;
2127 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2128 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2129 return false;
2130
2131 auto *SrcDef = MRI.getVRegDef(SrcReg);
2132 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2133 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2134 LLT SrcTy = MRI.getType(SrcReg);
2135 MatchInfo = [=](MachineIRBuilder &B) {
2136 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2137 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2138 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2139 };
2140 return true;
2141}
2142
2144 LshrOfTruncOfLshr &MatchInfo,
2145 MachineInstr &ShiftMI) const {
2146 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2147
2148 Register N0 = MI.getOperand(1).getReg();
2149 Register N1 = MI.getOperand(2).getReg();
2150 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2151
2152 APInt N1C, N001C;
2153 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2154 return false;
2155 auto N001 = ShiftMI.getOperand(2).getReg();
2156 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2157 return false;
2158
2159 if (N001C.getBitWidth() > N1C.getBitWidth())
2160 N1C = N1C.zext(N001C.getBitWidth());
2161 else
2162 N001C = N001C.zext(N1C.getBitWidth());
2163
2164 Register InnerShift = ShiftMI.getOperand(0).getReg();
2165 LLT InnerShiftTy = MRI.getType(InnerShift);
2166 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2167 if ((N1C + N001C).ult(InnerShiftSize)) {
2168 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2169 MatchInfo.ShiftAmt = N1C + N001C;
2170 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2171 MatchInfo.InnerShiftTy = InnerShiftTy;
2172
2173 if ((N001C + OpSizeInBits) == InnerShiftSize)
2174 return true;
2175 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2176 MatchInfo.Mask = true;
2177 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2178 return true;
2179 }
2180 }
2181 return false;
2182}
2183
2185 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2186 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2187
2188 Register Dst = MI.getOperand(0).getReg();
2189 auto ShiftAmt =
2190 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2191 auto Shift =
2192 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2193 if (MatchInfo.Mask == true) {
2194 APInt MaskVal =
2196 MatchInfo.MaskVal.getZExtValue());
2197 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2198 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2199 Builder.buildTrunc(Dst, And);
2200 } else
2201 Builder.buildTrunc(Dst, Shift);
2202 MI.eraseFromParent();
2203}
2204
2206 unsigned &ShiftVal) const {
2207 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2208 auto MaybeImmVal =
2209 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2210 if (!MaybeImmVal)
2211 return false;
2212
2213 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2214 return (static_cast<int32_t>(ShiftVal) != -1);
2215}
2216
2218 unsigned &ShiftVal) const {
2219 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2220 MachineIRBuilder MIB(MI);
2221 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2222 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2223 Observer.changingInstr(MI);
2224 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2225 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2226 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2228 Observer.changedInstr(MI);
2229}
2230
2232 BuildFnTy &MatchInfo) const {
2233 GSub &Sub = cast<GSub>(MI);
2234
2235 LLT Ty = MRI.getType(Sub.getReg(0));
2236
2237 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2238 return false;
2239
2241 return false;
2242
2243 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2244
2245 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2246 auto NegCst = B.buildConstant(Ty, -Imm);
2247 Observer.changingInstr(MI);
2248 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2249 MI.getOperand(2).setReg(NegCst.getReg(0));
2251 if (Imm.isMinSignedValue())
2253 Observer.changedInstr(MI);
2254 };
2255 return true;
2256}
2257
2258// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2260 RegisterImmPair &MatchData) const {
2261 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2262 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2263 return false;
2264
2265 Register LHS = MI.getOperand(1).getReg();
2266
2267 Register ExtSrc;
2268 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2269 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2270 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2271 return false;
2272
2273 Register RHS = MI.getOperand(2).getReg();
2274 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2275 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2276 if (!MaybeShiftAmtVal)
2277 return false;
2278
2279 if (LI) {
2280 LLT SrcTy = MRI.getType(ExtSrc);
2281
2282 // We only really care about the legality with the shifted value. We can
2283 // pick any type the constant shift amount, so ask the target what to
2284 // use. Otherwise we would have to guess and hope it is reported as legal.
2285 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2286 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2287 return false;
2288 }
2289
2290 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2291 MatchData.Reg = ExtSrc;
2292 MatchData.Imm = ShiftAmt;
2293
2294 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2295 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2296 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2297}
2298
2300 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2301 Register ExtSrcReg = MatchData.Reg;
2302 int64_t ShiftAmtVal = MatchData.Imm;
2303
2304 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2305 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2306 auto NarrowShift =
2307 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2308 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2309 MI.eraseFromParent();
2310}
2311
2313 Register &MatchInfo) const {
2315 SmallVector<Register, 16> MergedValues;
2316 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2317 MergedValues.emplace_back(Merge.getSourceReg(I));
2318
2319 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2320 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2321 return false;
2322
2323 for (unsigned I = 0; I < MergedValues.size(); ++I)
2324 if (MergedValues[I] != Unmerge->getReg(I))
2325 return false;
2326
2327 MatchInfo = Unmerge->getSourceReg();
2328 return true;
2329}
2330
2332 const MachineRegisterInfo &MRI) {
2333 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2334 ;
2335
2336 return Reg;
2337}
2338
2340 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2341 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2342 "Expected an unmerge");
2343 auto &Unmerge = cast<GUnmerge>(MI);
2344 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2345
2346 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2347 if (!SrcInstr)
2348 return false;
2349
2350 // Check the source type of the merge.
2351 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2352 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2353 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2354 if (SrcMergeTy != Dst0Ty && !SameSize)
2355 return false;
2356 // They are the same now (modulo a bitcast).
2357 // We can collect all the src registers.
2358 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2359 Operands.push_back(SrcInstr->getSourceReg(Idx));
2360 return true;
2361}
2362
2364 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2365 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2366 "Expected an unmerge");
2367 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2368 "Not enough operands to replace all defs");
2369 unsigned NumElems = MI.getNumOperands() - 1;
2370
2371 LLT SrcTy = MRI.getType(Operands[0]);
2372 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2373 bool CanReuseInputDirectly = DstTy == SrcTy;
2374 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2375 Register DstReg = MI.getOperand(Idx).getReg();
2376 Register SrcReg = Operands[Idx];
2377
2378 // This combine may run after RegBankSelect, so we need to be aware of
2379 // register banks.
2380 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2381 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2382 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2383 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2384 }
2385
2386 if (CanReuseInputDirectly)
2387 replaceRegWith(MRI, DstReg, SrcReg);
2388 else
2389 Builder.buildCast(DstReg, SrcReg);
2390 }
2391 MI.eraseFromParent();
2392}
2393
2395 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2396 unsigned SrcIdx = MI.getNumOperands() - 1;
2397 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2398 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2399 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2400 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2401 return false;
2402 // Break down the big constant in smaller ones.
2403 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2404 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2405 ? CstVal.getCImm()->getValue()
2406 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2407
2408 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2409 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2410 // Unmerge a constant.
2411 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2412 Csts.emplace_back(Val.trunc(ShiftAmt));
2413 Val = Val.lshr(ShiftAmt);
2414 }
2415
2416 return true;
2417}
2418
2420 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2421 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2422 "Expected an unmerge");
2423 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2424 "Not enough operands to replace all defs");
2425 unsigned NumElems = MI.getNumOperands() - 1;
2426 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2427 Register DstReg = MI.getOperand(Idx).getReg();
2428 Builder.buildConstant(DstReg, Csts[Idx]);
2429 }
2430
2431 MI.eraseFromParent();
2432}
2433
2436 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2437 unsigned SrcIdx = MI.getNumOperands() - 1;
2438 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2439 MatchInfo = [&MI](MachineIRBuilder &B) {
2440 unsigned NumElems = MI.getNumOperands() - 1;
2441 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2442 Register DstReg = MI.getOperand(Idx).getReg();
2443 B.buildUndef(DstReg);
2444 }
2445 };
2446 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2447}
2448
2450 MachineInstr &MI) const {
2451 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2452 "Expected an unmerge");
2453 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2454 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2455 return false;
2456 // Check that all the lanes are dead except the first one.
2457 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2458 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2459 return false;
2460 }
2461 return true;
2462}
2463
2465 MachineInstr &MI) const {
2466 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2467 Register Dst0Reg = MI.getOperand(0).getReg();
2468 Builder.buildTrunc(Dst0Reg, SrcReg);
2469 MI.eraseFromParent();
2470}
2471
2473 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2474 "Expected an unmerge");
2475 Register Dst0Reg = MI.getOperand(0).getReg();
2476 LLT Dst0Ty = MRI.getType(Dst0Reg);
2477 // G_ZEXT on vector applies to each lane, so it will
2478 // affect all destinations. Therefore we won't be able
2479 // to simplify the unmerge to just the first definition.
2480 if (Dst0Ty.isVector())
2481 return false;
2482 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2483 LLT SrcTy = MRI.getType(SrcReg);
2484 if (SrcTy.isVector())
2485 return false;
2486
2487 Register ZExtSrcReg;
2488 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2489 return false;
2490
2491 // Finally we can replace the first definition with
2492 // a zext of the source if the definition is big enough to hold
2493 // all of ZExtSrc bits.
2494 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2495 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2496}
2497
2499 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2500 "Expected an unmerge");
2501
2502 Register Dst0Reg = MI.getOperand(0).getReg();
2503
2504 MachineInstr *ZExtInstr =
2505 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2506 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2507 "Expecting a G_ZEXT");
2508
2509 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2510 LLT Dst0Ty = MRI.getType(Dst0Reg);
2511 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2512
2513 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2514 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2515 } else {
2516 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2517 "ZExt src doesn't fit in destination");
2518 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2519 }
2520
2521 Register ZeroReg;
2522 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2523 if (!ZeroReg)
2524 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2525 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2526 }
2527 MI.eraseFromParent();
2528}
2529
2531 unsigned TargetShiftSize,
2532 unsigned &ShiftVal) const {
2533 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2534 MI.getOpcode() == TargetOpcode::G_LSHR ||
2535 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2536
2537 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2538 if (Ty.isVector()) // TODO:
2539 return false;
2540
2541 // Don't narrow further than the requested size.
2542 unsigned Size = Ty.getSizeInBits();
2543 if (Size <= TargetShiftSize)
2544 return false;
2545
2546 auto MaybeImmVal =
2547 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2548 if (!MaybeImmVal)
2549 return false;
2550
2551 ShiftVal = MaybeImmVal->Value.getSExtValue();
2552 return ShiftVal >= Size / 2 && ShiftVal < Size;
2553}
2554
2556 MachineInstr &MI, const unsigned &ShiftVal) const {
2557 Register DstReg = MI.getOperand(0).getReg();
2558 Register SrcReg = MI.getOperand(1).getReg();
2559 LLT Ty = MRI.getType(SrcReg);
2560 unsigned Size = Ty.getSizeInBits();
2561 unsigned HalfSize = Size / 2;
2562 assert(ShiftVal >= HalfSize);
2563
2564 LLT HalfTy = LLT::scalar(HalfSize);
2565
2566 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2567 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2568
2569 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2570 Register Narrowed = Unmerge.getReg(1);
2571
2572 // dst = G_LSHR s64:x, C for C >= 32
2573 // =>
2574 // lo, hi = G_UNMERGE_VALUES x
2575 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2576
2577 if (NarrowShiftAmt != 0) {
2578 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2579 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2580 }
2581
2582 auto Zero = Builder.buildConstant(HalfTy, 0);
2583 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2584 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2585 Register Narrowed = Unmerge.getReg(0);
2586 // dst = G_SHL s64:x, C for C >= 32
2587 // =>
2588 // lo, hi = G_UNMERGE_VALUES x
2589 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2590 if (NarrowShiftAmt != 0) {
2591 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2592 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2593 }
2594
2595 auto Zero = Builder.buildConstant(HalfTy, 0);
2596 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2597 } else {
2598 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2599 auto Hi = Builder.buildAShr(
2600 HalfTy, Unmerge.getReg(1),
2601 Builder.buildConstant(HalfTy, HalfSize - 1));
2602
2603 if (ShiftVal == HalfSize) {
2604 // (G_ASHR i64:x, 32) ->
2605 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2606 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2607 } else if (ShiftVal == Size - 1) {
2608 // Don't need a second shift.
2609 // (G_ASHR i64:x, 63) ->
2610 // %narrowed = (G_ASHR hi_32(x), 31)
2611 // G_MERGE_VALUES %narrowed, %narrowed
2612 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2613 } else {
2614 auto Lo = Builder.buildAShr(
2615 HalfTy, Unmerge.getReg(1),
2616 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2617
2618 // (G_ASHR i64:x, C) ->, for C >= 32
2619 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2620 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2621 }
2622 }
2623
2624 MI.eraseFromParent();
2625}
2626
2628 MachineInstr &MI, unsigned TargetShiftAmount) const {
2629 unsigned ShiftAmt;
2630 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2631 applyCombineShiftToUnmerge(MI, ShiftAmt);
2632 return true;
2633 }
2634
2635 return false;
2636}
2637
2639 Register &Reg) const {
2640 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2641 Register DstReg = MI.getOperand(0).getReg();
2642 LLT DstTy = MRI.getType(DstReg);
2643 Register SrcReg = MI.getOperand(1).getReg();
2644 return mi_match(SrcReg, MRI,
2645 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2646}
2647
2649 Register &Reg) const {
2650 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2651 Register DstReg = MI.getOperand(0).getReg();
2652 Builder.buildCopy(DstReg, Reg);
2653 MI.eraseFromParent();
2654}
2655
2657 Register &Reg) const {
2658 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2659 Register DstReg = MI.getOperand(0).getReg();
2660 Builder.buildZExtOrTrunc(DstReg, Reg);
2661 MI.eraseFromParent();
2662}
2663
2665 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2666 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2667 Register LHS = MI.getOperand(1).getReg();
2668 Register RHS = MI.getOperand(2).getReg();
2669 LLT IntTy = MRI.getType(LHS);
2670
2671 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2672 // instruction.
2673 PtrReg.second = false;
2674 for (Register SrcReg : {LHS, RHS}) {
2675 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2676 // Don't handle cases where the integer is implicitly converted to the
2677 // pointer width.
2678 LLT PtrTy = MRI.getType(PtrReg.first);
2679 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2680 return true;
2681 }
2682
2683 PtrReg.second = true;
2684 }
2685
2686 return false;
2687}
2688
2690 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2691 Register Dst = MI.getOperand(0).getReg();
2692 Register LHS = MI.getOperand(1).getReg();
2693 Register RHS = MI.getOperand(2).getReg();
2694
2695 const bool DoCommute = PtrReg.second;
2696 if (DoCommute)
2697 std::swap(LHS, RHS);
2698 LHS = PtrReg.first;
2699
2700 LLT PtrTy = MRI.getType(LHS);
2701
2702 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2703 Builder.buildPtrToInt(Dst, PtrAdd);
2704 MI.eraseFromParent();
2705}
2706
2708 APInt &NewCst) const {
2709 auto &PtrAdd = cast<GPtrAdd>(MI);
2710 Register LHS = PtrAdd.getBaseReg();
2711 Register RHS = PtrAdd.getOffsetReg();
2712 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2713
2714 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2715 APInt Cst;
2716 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2717 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2718 // G_INTTOPTR uses zero-extension
2719 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2720 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2721 return true;
2722 }
2723 }
2724
2725 return false;
2726}
2727
2729 APInt &NewCst) const {
2730 auto &PtrAdd = cast<GPtrAdd>(MI);
2731 Register Dst = PtrAdd.getReg(0);
2732
2733 Builder.buildConstant(Dst, NewCst);
2734 PtrAdd.eraseFromParent();
2735}
2736
2738 Register &Reg) const {
2739 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2740 Register DstReg = MI.getOperand(0).getReg();
2741 Register SrcReg = MI.getOperand(1).getReg();
2742 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2743 if (OriginalSrcReg.isValid())
2744 SrcReg = OriginalSrcReg;
2745 LLT DstTy = MRI.getType(DstReg);
2746 return mi_match(SrcReg, MRI,
2747 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2748 canReplaceReg(DstReg, Reg, MRI);
2749}
2750
2752 Register &Reg) const {
2753 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2754 Register DstReg = MI.getOperand(0).getReg();
2755 Register SrcReg = MI.getOperand(1).getReg();
2756 LLT DstTy = MRI.getType(DstReg);
2757 if (mi_match(SrcReg, MRI,
2758 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2759 canReplaceReg(DstReg, Reg, MRI)) {
2760 unsigned DstSize = DstTy.getScalarSizeInBits();
2761 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2762 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2763 }
2764 return false;
2765}
2766
2768 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2769 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2770
2771 // ShiftTy > 32 > TruncTy -> 32
2772 if (ShiftSize > 32 && TruncSize < 32)
2773 return ShiftTy.changeElementSize(32);
2774
2775 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2776 // Some targets like it, some don't, some only like it under certain
2777 // conditions/processor versions, etc.
2778 // A TL hook might be needed for this.
2779
2780 // Don't combine
2781 return ShiftTy;
2782}
2783
2785 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2786 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2787 Register DstReg = MI.getOperand(0).getReg();
2788 Register SrcReg = MI.getOperand(1).getReg();
2789
2790 if (!MRI.hasOneNonDBGUse(SrcReg))
2791 return false;
2792
2793 LLT SrcTy = MRI.getType(SrcReg);
2794 LLT DstTy = MRI.getType(DstReg);
2795
2796 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2797 const auto &TL = getTargetLowering();
2798
2799 LLT NewShiftTy;
2800 switch (SrcMI->getOpcode()) {
2801 default:
2802 return false;
2803 case TargetOpcode::G_SHL: {
2804 NewShiftTy = DstTy;
2805
2806 // Make sure new shift amount is legal.
2807 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2808 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2809 return false;
2810 break;
2811 }
2812 case TargetOpcode::G_LSHR:
2813 case TargetOpcode::G_ASHR: {
2814 // For right shifts, we conservatively do not do the transform if the TRUNC
2815 // has any STORE users. The reason is that if we change the type of the
2816 // shift, we may break the truncstore combine.
2817 //
2818 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2819 for (auto &User : MRI.use_instructions(DstReg))
2820 if (User.getOpcode() == TargetOpcode::G_STORE)
2821 return false;
2822
2823 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2824 if (NewShiftTy == SrcTy)
2825 return false;
2826
2827 // Make sure we won't lose information by truncating the high bits.
2828 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2829 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2830 DstTy.getScalarSizeInBits()))
2831 return false;
2832 break;
2833 }
2834 }
2835
2837 {SrcMI->getOpcode(),
2838 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2839 return false;
2840
2841 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2842 return true;
2843}
2844
2846 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2847 MachineInstr *ShiftMI = MatchInfo.first;
2848 LLT NewShiftTy = MatchInfo.second;
2849
2850 Register Dst = MI.getOperand(0).getReg();
2851 LLT DstTy = MRI.getType(Dst);
2852
2853 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2854 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2855 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2856
2857 Register NewShift =
2858 Builder
2859 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2860 .getReg(0);
2861
2862 if (NewShiftTy == DstTy)
2863 replaceRegWith(MRI, Dst, NewShift);
2864 else
2865 Builder.buildTrunc(Dst, NewShift);
2866
2867 eraseInst(MI);
2868}
2869
2871 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2872 return MO.isReg() &&
2873 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2874 });
2875}
2876
2878 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2879 return !MO.isReg() ||
2880 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2881 });
2882}
2883
2885 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2886 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2887 return all_of(Mask, [](int Elt) { return Elt < 0; });
2888}
2889
2891 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2892 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2893 MRI);
2894}
2895
2897 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2898 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2899 MRI);
2900}
2901
2903 MachineInstr &MI) const {
2904 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2905 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2906 "Expected an insert/extract element op");
2907 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2908 if (VecTy.isScalableVector())
2909 return false;
2910
2911 unsigned IdxIdx =
2912 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2913 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2914 if (!Idx)
2915 return false;
2916 return Idx->getZExtValue() >= VecTy.getNumElements();
2917}
2918
2920 unsigned &OpIdx) const {
2921 GSelect &SelMI = cast<GSelect>(MI);
2922 auto Cst =
2923 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2924 if (!Cst)
2925 return false;
2926 OpIdx = Cst->isZero() ? 3 : 2;
2927 return true;
2928}
2929
2930void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2931
2933 const MachineOperand &MOP2) const {
2934 if (!MOP1.isReg() || !MOP2.isReg())
2935 return false;
2936 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2937 if (!InstAndDef1)
2938 return false;
2939 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2940 if (!InstAndDef2)
2941 return false;
2942 MachineInstr *I1 = InstAndDef1->MI;
2943 MachineInstr *I2 = InstAndDef2->MI;
2944
2945 // Handle a case like this:
2946 //
2947 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2948 //
2949 // Even though %0 and %1 are produced by the same instruction they are not
2950 // the same values.
2951 if (I1 == I2)
2952 return MOP1.getReg() == MOP2.getReg();
2953
2954 // If we have an instruction which loads or stores, we can't guarantee that
2955 // it is identical.
2956 //
2957 // For example, we may have
2958 //
2959 // %x1 = G_LOAD %addr (load N from @somewhere)
2960 // ...
2961 // call @foo
2962 // ...
2963 // %x2 = G_LOAD %addr (load N from @somewhere)
2964 // ...
2965 // %or = G_OR %x1, %x2
2966 //
2967 // It's possible that @foo will modify whatever lives at the address we're
2968 // loading from. To be safe, let's just assume that all loads and stores
2969 // are different (unless we have something which is guaranteed to not
2970 // change.)
2971 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2972 return false;
2973
2974 // If both instructions are loads or stores, they are equal only if both
2975 // are dereferenceable invariant loads with the same number of bits.
2976 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2979 if (!LS1 || !LS2)
2980 return false;
2981
2982 if (!I2->isDereferenceableInvariantLoad() ||
2983 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2984 return false;
2985 }
2986
2987 // Check for physical registers on the instructions first to avoid cases
2988 // like this:
2989 //
2990 // %a = COPY $physreg
2991 // ...
2992 // SOMETHING implicit-def $physreg
2993 // ...
2994 // %b = COPY $physreg
2995 //
2996 // These copies are not equivalent.
2997 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2998 return MO.isReg() && MO.getReg().isPhysical();
2999 })) {
3000 // Check if we have a case like this:
3001 //
3002 // %a = COPY $physreg
3003 // %b = COPY %a
3004 //
3005 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
3006 // From that, we know that they must have the same value, since they must
3007 // have come from the same COPY.
3008 return I1->isIdenticalTo(*I2);
3009 }
3010
3011 // We don't have any physical registers, so we don't necessarily need the
3012 // same vreg defs.
3013 //
3014 // On the off-chance that there's some target instruction feeding into the
3015 // instruction, let's use produceSameValue instead of isIdenticalTo.
3016 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
3017 // Handle instructions with multiple defs that produce same values. Values
3018 // are same for operands with same index.
3019 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3020 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3021 // I1 and I2 are different instructions but produce same values,
3022 // %1 and %6 are same, %1 and %7 are not the same value.
3023 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
3024 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
3025 }
3026 return false;
3027}
3028
3030 int64_t C) const {
3031 if (!MOP.isReg())
3032 return false;
3033 auto *MI = MRI.getVRegDef(MOP.getReg());
3034 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
3035 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
3036 MaybeCst->getSExtValue() == C;
3037}
3038
3040 double C) const {
3041 if (!MOP.isReg())
3042 return false;
3043 std::optional<FPValueAndVReg> MaybeCst;
3044 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
3045 return false;
3046
3047 return MaybeCst->Value.isExactlyValue(C);
3048}
3049
3051 unsigned OpIdx) const {
3052 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3053 Register OldReg = MI.getOperand(0).getReg();
3054 Register Replacement = MI.getOperand(OpIdx).getReg();
3055 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3056 replaceRegWith(MRI, OldReg, Replacement);
3057 MI.eraseFromParent();
3058}
3059
3061 Register Replacement) const {
3062 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3063 Register OldReg = MI.getOperand(0).getReg();
3064 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3065 replaceRegWith(MRI, OldReg, Replacement);
3066 MI.eraseFromParent();
3067}
3068
3070 unsigned ConstIdx) const {
3071 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3072 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3073
3074 // Get the shift amount
3075 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3076 if (!VRegAndVal)
3077 return false;
3078
3079 // Return true of shift amount >= Bitwidth
3080 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3081}
3082
3084 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3085 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3086 "This is not a funnel shift operation");
3087
3088 Register ConstReg = MI.getOperand(3).getReg();
3089 LLT ConstTy = MRI.getType(ConstReg);
3090 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3091
3092 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3093 assert((VRegAndVal) && "Value is not a constant");
3094
3095 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3096 APInt NewConst = VRegAndVal->Value.urem(
3097 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3098
3099 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3100 Builder.buildInstr(
3101 MI.getOpcode(), {MI.getOperand(0)},
3102 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3103
3104 MI.eraseFromParent();
3105}
3106
3108 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3109 // Match (cond ? x : x)
3110 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3111 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3112 MRI);
3113}
3114
3116 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3117 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3118 MRI);
3119}
3120
3122 unsigned OpIdx) const {
3123 MachineOperand &MO = MI.getOperand(OpIdx);
3124 return MO.isReg() &&
3125 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3126}
3127
3129 const MachineOperand &MO, bool OrNegative) const {
3130 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT, OrNegative);
3131}
3132
3134 double C) const {
3135 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3136 Builder.buildFConstant(MI.getOperand(0), C);
3137 MI.eraseFromParent();
3138}
3139
3141 int64_t C) const {
3142 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3143 Builder.buildConstant(MI.getOperand(0), C);
3144 MI.eraseFromParent();
3145}
3146
3148 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3149 Builder.buildConstant(MI.getOperand(0), C);
3150 MI.eraseFromParent();
3151}
3152
3154 ConstantFP *CFP) const {
3155 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3156 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3157 MI.eraseFromParent();
3158}
3159
3161 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3162 Builder.buildUndef(MI.getOperand(0));
3163 MI.eraseFromParent();
3164}
3165
3167 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3168 Register LHS = MI.getOperand(1).getReg();
3169 Register RHS = MI.getOperand(2).getReg();
3170 Register &NewLHS = std::get<0>(MatchInfo);
3171 Register &NewRHS = std::get<1>(MatchInfo);
3172
3173 // Helper lambda to check for opportunities for
3174 // ((0-A) + B) -> B - A
3175 // (A + (0-B)) -> A - B
3176 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3177 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3178 return false;
3179 NewLHS = MaybeNewLHS;
3180 return true;
3181 };
3182
3183 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3184}
3185
3187 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3188 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3189 "Invalid opcode");
3190 Register DstReg = MI.getOperand(0).getReg();
3191 LLT DstTy = MRI.getType(DstReg);
3192 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3193
3194 if (DstTy.isScalableVector())
3195 return false;
3196
3197 unsigned NumElts = DstTy.getNumElements();
3198 // If this MI is part of a sequence of insert_vec_elts, then
3199 // don't do the combine in the middle of the sequence.
3200 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3201 TargetOpcode::G_INSERT_VECTOR_ELT)
3202 return false;
3203 MachineInstr *CurrInst = &MI;
3204 MachineInstr *TmpInst;
3205 int64_t IntImm;
3206 Register TmpReg;
3207 MatchInfo.resize(NumElts);
3208 while (mi_match(
3209 CurrInst->getOperand(0).getReg(), MRI,
3210 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3211 if (IntImm >= NumElts || IntImm < 0)
3212 return false;
3213 if (!MatchInfo[IntImm])
3214 MatchInfo[IntImm] = TmpReg;
3215 CurrInst = TmpInst;
3216 }
3217 // Variable index.
3218 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3219 return false;
3220 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3221 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3222 if (!MatchInfo[I - 1].isValid())
3223 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3224 }
3225 return true;
3226 }
3227 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3228 // overwritten, bail out.
3229 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3230 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3231}
3232
3234 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3235 Register UndefReg;
3236 auto GetUndef = [&]() {
3237 if (UndefReg)
3238 return UndefReg;
3239 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3240 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3241 return UndefReg;
3242 };
3243 for (Register &Reg : MatchInfo) {
3244 if (!Reg)
3245 Reg = GetUndef();
3246 }
3247 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3248 MI.eraseFromParent();
3249}
3250
3252 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3253 Register SubLHS, SubRHS;
3254 std::tie(SubLHS, SubRHS) = MatchInfo;
3255 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3256 MI.eraseFromParent();
3257}
3258
3259bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3260 unsigned RootOpc, Register Dst,
3261 LLT Ty,
3262 BuildFnTy &MatchInfo) const {
3263 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3264 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3265 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3266 if (!InnerDef)
3267 return false;
3268
3269 unsigned InnerOpc = InnerDef->getOpcode();
3270 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3271 return false;
3272
3273 if (!MRI.hasOneNonDBGUse(MInner))
3274 return false;
3275
3276 Register InnerLHS = InnerDef->getOperand(1).getReg();
3277 Register InnerRHS = InnerDef->getOperand(2).getReg();
3278 Register NotSrc;
3279 Register B, C;
3280
3281 // Check if either operand is ~b
3282 auto TryMatch = [&](Register MaybeNot, Register Other) {
3283 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3284 if (!MRI.hasOneNonDBGUse(MaybeNot))
3285 return false;
3286 B = NotSrc;
3287 C = Other;
3288 return true;
3289 }
3290 return false;
3291 };
3292
3293 if (!TryMatch(InnerLHS, InnerRHS) && !TryMatch(InnerRHS, InnerLHS))
3294 return false;
3295
3296 // Flip add/sub
3297 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3298 : TargetOpcode::G_ADD;
3299
3300 Register A = Other;
3301 MatchInfo = [=](MachineIRBuilder &Builder) {
3302 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3303 auto NewNot = Builder.buildNot(Ty, NewInner);
3304 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3305 };
3306 return true;
3307}
3308
3310 BuildFnTy &MatchInfo) const {
3311 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3312 // Root MI is one of G_AND, G_OR, G_XOR.
3313 // We also look for commuted forms of operations. Pattern shouldn't apply
3314 // if there are multiple reasons of inner operations.
3315
3316 unsigned RootOpc = MI.getOpcode();
3317 Register Dst = MI.getOperand(0).getReg();
3318 LLT Ty = MRI.getType(Dst);
3319
3320 Register LHS = MI.getOperand(1).getReg();
3321 Register RHS = MI.getOperand(2).getReg();
3322 // Check the commuted and uncommuted forms of the operation.
3323 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3324 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3325}
3326
3328 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3329 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3330 //
3331 // Creates the new hand + logic instruction (but does not insert them.)
3332 //
3333 // On success, MatchInfo is populated with the new instructions. These are
3334 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3335 unsigned LogicOpcode = MI.getOpcode();
3336 assert(LogicOpcode == TargetOpcode::G_AND ||
3337 LogicOpcode == TargetOpcode::G_OR ||
3338 LogicOpcode == TargetOpcode::G_XOR);
3339 MachineIRBuilder MIB(MI);
3340 Register Dst = MI.getOperand(0).getReg();
3341 Register LHSReg = MI.getOperand(1).getReg();
3342 Register RHSReg = MI.getOperand(2).getReg();
3343
3344 // Don't recompute anything.
3345 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3346 return false;
3347
3348 // Make sure we have (hand x, ...), (hand y, ...)
3349 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3350 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3351 if (!LeftHandInst || !RightHandInst)
3352 return false;
3353 unsigned HandOpcode = LeftHandInst->getOpcode();
3354 if (HandOpcode != RightHandInst->getOpcode())
3355 return false;
3356 if (LeftHandInst->getNumOperands() < 2 ||
3357 !LeftHandInst->getOperand(1).isReg() ||
3358 RightHandInst->getNumOperands() < 2 ||
3359 !RightHandInst->getOperand(1).isReg())
3360 return false;
3361
3362 // Make sure the types match up, and if we're doing this post-legalization,
3363 // we end up with legal types.
3364 Register X = LeftHandInst->getOperand(1).getReg();
3365 Register Y = RightHandInst->getOperand(1).getReg();
3366 LLT XTy = MRI.getType(X);
3367 LLT YTy = MRI.getType(Y);
3368 if (!XTy.isValid() || XTy != YTy)
3369 return false;
3370
3371 // Optional extra source register.
3372 Register ExtraHandOpSrcReg;
3373 switch (HandOpcode) {
3374 default:
3375 return false;
3376 case TargetOpcode::G_ANYEXT:
3377 case TargetOpcode::G_SEXT:
3378 case TargetOpcode::G_ZEXT: {
3379 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3380 break;
3381 }
3382 case TargetOpcode::G_TRUNC: {
3383 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3384 const MachineFunction *MF = MI.getMF();
3385 LLVMContext &Ctx = MF->getFunction().getContext();
3386
3387 LLT DstTy = MRI.getType(Dst);
3388 const TargetLowering &TLI = getTargetLowering();
3389
3390 // Be extra careful sinking truncate. If it's free, there's no benefit in
3391 // widening a binop.
3392 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3393 return false;
3394 break;
3395 }
3396 case TargetOpcode::G_AND:
3397 case TargetOpcode::G_ASHR:
3398 case TargetOpcode::G_LSHR:
3399 case TargetOpcode::G_SHL: {
3400 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3401 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3402 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3403 return false;
3404 ExtraHandOpSrcReg = ZOp.getReg();
3405 break;
3406 }
3407 }
3408
3409 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3410 return false;
3411
3412 // Record the steps to build the new instructions.
3413 //
3414 // Steps to build (logic x, y)
3415 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3416 OperandBuildSteps LogicBuildSteps = {
3417 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3418 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3419 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3420 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3421
3422 // Steps to build hand (logic x, y), ...z
3423 OperandBuildSteps HandBuildSteps = {
3424 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3425 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3426 if (ExtraHandOpSrcReg.isValid())
3427 HandBuildSteps.push_back(
3428 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3429 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3430
3431 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3432 return true;
3433}
3434
3436 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3437 assert(MatchInfo.InstrsToBuild.size() &&
3438 "Expected at least one instr to build?");
3439 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3440 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3441 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3442 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3443 for (auto &OperandFn : InstrToBuild.OperandFns)
3444 OperandFn(Instr);
3445 }
3446 MI.eraseFromParent();
3447}
3448
3450 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3451 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3452 int64_t ShlCst, AshrCst;
3453 Register Src;
3454 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3455 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3456 m_ICstOrSplat(AshrCst))))
3457 return false;
3458 if (ShlCst != AshrCst)
3459 return false;
3461 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3462 return false;
3463 MatchInfo = std::make_tuple(Src, ShlCst);
3464 return true;
3465}
3466
3468 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3469 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3470 Register Src;
3471 int64_t ShiftAmt;
3472 std::tie(Src, ShiftAmt) = MatchInfo;
3473 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3474 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3475 MI.eraseFromParent();
3476}
3477
3478/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3481 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3482 assert(MI.getOpcode() == TargetOpcode::G_AND);
3483
3484 Register Dst = MI.getOperand(0).getReg();
3485 LLT Ty = MRI.getType(Dst);
3486
3487 Register R;
3488 int64_t C1;
3489 int64_t C2;
3490 if (!mi_match(
3491 Dst, MRI,
3492 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3493 return false;
3494
3495 MatchInfo = [=](MachineIRBuilder &B) {
3496 if (C1 & C2) {
3497 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3498 return;
3499 }
3500 auto Zero = B.buildConstant(Ty, 0);
3501 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3502 };
3503 return true;
3504}
3505
3507 Register &Replacement) const {
3508 // Given
3509 //
3510 // %y:_(sN) = G_SOMETHING
3511 // %x:_(sN) = G_SOMETHING
3512 // %res:_(sN) = G_AND %x, %y
3513 //
3514 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3515 //
3516 // Patterns like this can appear as a result of legalization. E.g.
3517 //
3518 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3519 // %one:_(s32) = G_CONSTANT i32 1
3520 // %and:_(s32) = G_AND %cmp, %one
3521 //
3522 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3523 assert(MI.getOpcode() == TargetOpcode::G_AND);
3524 if (!VT)
3525 return false;
3526
3527 Register AndDst = MI.getOperand(0).getReg();
3528 Register LHS = MI.getOperand(1).getReg();
3529 Register RHS = MI.getOperand(2).getReg();
3530
3531 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3532 // we can't do anything. If we do, then it depends on whether we have
3533 // KnownBits on the LHS.
3534 KnownBits RHSBits = VT->getKnownBits(RHS);
3535 if (RHSBits.isUnknown())
3536 return false;
3537
3538 KnownBits LHSBits = VT->getKnownBits(LHS);
3539
3540 // Check that x & Mask == x.
3541 // x & 1 == x, always
3542 // x & 0 == x, only if x is also 0
3543 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3544 //
3545 // Check if we can replace AndDst with the LHS of the G_AND
3546 if (canReplaceReg(AndDst, LHS, MRI) &&
3547 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3548 Replacement = LHS;
3549 return true;
3550 }
3551
3552 // Check if we can replace AndDst with the RHS of the G_AND
3553 if (canReplaceReg(AndDst, RHS, MRI) &&
3554 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3555 Replacement = RHS;
3556 return true;
3557 }
3558
3559 return false;
3560}
3561
3563 Register &Replacement) const {
3564 // Given
3565 //
3566 // %y:_(sN) = G_SOMETHING
3567 // %x:_(sN) = G_SOMETHING
3568 // %res:_(sN) = G_OR %x, %y
3569 //
3570 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3571 assert(MI.getOpcode() == TargetOpcode::G_OR);
3572 if (!VT)
3573 return false;
3574
3575 Register OrDst = MI.getOperand(0).getReg();
3576 Register LHS = MI.getOperand(1).getReg();
3577 Register RHS = MI.getOperand(2).getReg();
3578
3579 KnownBits LHSBits = VT->getKnownBits(LHS);
3580 KnownBits RHSBits = VT->getKnownBits(RHS);
3581
3582 // Check that x | Mask == x.
3583 // x | 0 == x, always
3584 // x | 1 == x, only if x is also 1
3585 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3586 //
3587 // Check if we can replace OrDst with the LHS of the G_OR
3588 if (canReplaceReg(OrDst, LHS, MRI) &&
3589 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3590 Replacement = LHS;
3591 return true;
3592 }
3593
3594 // Check if we can replace OrDst with the RHS of the G_OR
3595 if (canReplaceReg(OrDst, RHS, MRI) &&
3596 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3597 Replacement = RHS;
3598 return true;
3599 }
3600
3601 return false;
3602}
3603
3605 // If the input is already sign extended, just drop the extension.
3606 Register Src = MI.getOperand(1).getReg();
3607 unsigned ExtBits = MI.getOperand(2).getImm();
3608 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3609 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3610}
3611
3612static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3613 int64_t Cst, bool IsVector, bool IsFP) {
3614 // For i1, Cst will always be -1 regardless of boolean contents.
3615 return (ScalarSizeBits == 1 && Cst == -1) ||
3616 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3617}
3618
3619// This pattern aims to match the following shape to avoid extra mov
3620// instructions
3621// G_BUILD_VECTOR(
3622// G_UNMERGE_VALUES(src, 0)
3623// G_UNMERGE_VALUES(src, 1)
3624// G_IMPLICIT_DEF
3625// G_IMPLICIT_DEF
3626// )
3627// ->
3628// G_CONCAT_VECTORS(
3629// src,
3630// undef
3631// )
3634 Register &UnmergeSrc) const {
3635 auto &BV = cast<GBuildVector>(MI);
3636
3637 unsigned BuildUseCount = BV.getNumSources();
3638 if (BuildUseCount % 2 != 0)
3639 return false;
3640
3641 unsigned NumUnmerge = BuildUseCount / 2;
3642
3643 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3644
3645 // Check the first operand is an unmerge and has the correct number of
3646 // operands
3647 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3648 return false;
3649
3650 UnmergeSrc = Unmerge->getSourceReg();
3651
3652 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3653 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3654
3655 if (!UnmergeSrcTy.isVector())
3656 return false;
3657
3658 // Ensure we only generate legal instructions post-legalizer
3659 if (!IsPreLegalize &&
3660 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3661 return false;
3662
3663 // Check that all of the operands before the midpoint come from the same
3664 // unmerge and are in the same order as they are used in the build_vector
3665 for (unsigned I = 0; I < NumUnmerge; ++I) {
3666 auto MaybeUnmergeReg = BV.getSourceReg(I);
3667 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3668
3669 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3670 return false;
3671
3672 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3673 return false;
3674 }
3675
3676 // Check that all of the unmerged values are used
3677 if (Unmerge->getNumDefs() != NumUnmerge)
3678 return false;
3679
3680 // Check that all of the operands after the mid point are undefs.
3681 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3682 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3683
3684 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3685 return false;
3686 }
3687
3688 return true;
3689}
3690
3694 Register &UnmergeSrc) const {
3695 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3696 B.setInstrAndDebugLoc(MI);
3697
3698 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3699 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3700
3701 MI.eraseFromParent();
3702}
3703
3704// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3705// using vector truncates instead
3706//
3707// EXAMPLE:
3708// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3709// %T_a(i16) = G_TRUNC %a(i32)
3710// %T_b(i16) = G_TRUNC %b(i32)
3711// %Undef(i16) = G_IMPLICIT_DEF(i16)
3712// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3713//
3714// ===>
3715// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3716// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3717// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3718//
3719// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3721 Register &MatchInfo) const {
3722 auto BuildMI = cast<GBuildVector>(&MI);
3723 unsigned NumOperands = BuildMI->getNumSources();
3724 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3725
3726 // Check the G_BUILD_VECTOR sources
3727 unsigned I;
3728 MachineInstr *UnmergeMI = nullptr;
3729
3730 // Check all source TRUNCs come from the same UNMERGE instruction
3731 // and that the element order matches (BUILD_VECTOR position I
3732 // corresponds to UNMERGE result I)
3733 for (I = 0; I < NumOperands; ++I) {
3734 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3735 auto SrcMIOpc = SrcMI->getOpcode();
3736
3737 // Check if the G_TRUNC instructions all come from the same MI
3738 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3739 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3740 if (!UnmergeMI) {
3741 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3742 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3743 return false;
3744 } else {
3745 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3746 if (UnmergeMI != UnmergeSrcMI)
3747 return false;
3748 }
3749 // Verify element ordering: BUILD_VECTOR position I must use
3750 // UNMERGE result I, otherwise the fold would lose element reordering
3751 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3752 return false;
3753 } else {
3754 break;
3755 }
3756 }
3757 if (I < 2)
3758 return false;
3759
3760 // Check the remaining source elements are only G_IMPLICIT_DEF
3761 for (; I < NumOperands; ++I) {
3762 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3763 auto SrcMIOpc = SrcMI->getOpcode();
3764
3765 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3766 return false;
3767 }
3768
3769 // Check the size of unmerge source
3770 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3771 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3772 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3773 return false;
3774
3775 // Check the unmerge source and destination element types match
3776 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3777 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3778 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3779 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3780 return false;
3781
3782 // Only generate legal instructions post-legalizer
3783 if (!IsPreLegalize) {
3784 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3785
3786 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3787 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3788 return false;
3789
3790 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3791 return false;
3792 }
3793
3794 return true;
3795}
3796
3798 Register &MatchInfo) const {
3799 Register MidReg;
3800 auto BuildMI = cast<GBuildVector>(&MI);
3801 Register DstReg = BuildMI->getReg(0);
3802 LLT DstTy = MRI.getType(DstReg);
3803 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3804 unsigned DstTyNumElt = DstTy.getNumElements();
3805 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3806
3807 // No need to pad vector if only G_TRUNC is needed
3808 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3809 MidReg = MatchInfo;
3810 } else {
3811 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3812 SmallVector<Register> ConcatRegs = {MatchInfo};
3813 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3814 ConcatRegs.push_back(UndefReg);
3815
3816 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3817 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3818 }
3819
3820 Builder.buildTrunc(DstReg, MidReg);
3821 MI.eraseFromParent();
3822}
3823
3825 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3826 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3827 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3828 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3829 Register XorSrc;
3830 Register CstReg;
3831 // We match xor(src, true) here.
3832 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3833 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3834 return false;
3835
3836 if (!MRI.hasOneNonDBGUse(XorSrc))
3837 return false;
3838
3839 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3840 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3841 // list of tree nodes to visit.
3842 RegsToNegate.push_back(XorSrc);
3843 // Remember whether the comparisons are all integer or all floating point.
3844 bool IsInt = false;
3845 bool IsFP = false;
3846 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3847 Register Reg = RegsToNegate[I];
3848 if (!MRI.hasOneNonDBGUse(Reg))
3849 return false;
3850 MachineInstr *Def = MRI.getVRegDef(Reg);
3851 switch (Def->getOpcode()) {
3852 default:
3853 // Don't match if the tree contains anything other than ANDs, ORs and
3854 // comparisons.
3855 return false;
3856 case TargetOpcode::G_ICMP:
3857 if (IsFP)
3858 return false;
3859 IsInt = true;
3860 // When we apply the combine we will invert the predicate.
3861 break;
3862 case TargetOpcode::G_FCMP:
3863 if (IsInt)
3864 return false;
3865 IsFP = true;
3866 // When we apply the combine we will invert the predicate.
3867 break;
3868 case TargetOpcode::G_AND:
3869 case TargetOpcode::G_OR:
3870 // Implement De Morgan's laws:
3871 // ~(x & y) -> ~x | ~y
3872 // ~(x | y) -> ~x & ~y
3873 // When we apply the combine we will change the opcode and recursively
3874 // negate the operands.
3875 RegsToNegate.push_back(Def->getOperand(1).getReg());
3876 RegsToNegate.push_back(Def->getOperand(2).getReg());
3877 break;
3878 }
3879 }
3880
3881 // Now we know whether the comparisons are integer or floating point, check
3882 // the constant in the xor.
3883 int64_t Cst;
3884 if (Ty.isVector()) {
3885 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3886 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3887 if (!MaybeCst)
3888 return false;
3889 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3890 return false;
3891 } else {
3892 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3893 return false;
3894 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3895 return false;
3896 }
3897
3898 return true;
3899}
3900
3902 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3903 for (Register Reg : RegsToNegate) {
3904 MachineInstr *Def = MRI.getVRegDef(Reg);
3905 Observer.changingInstr(*Def);
3906 // For each comparison, invert the opcode. For each AND and OR, change the
3907 // opcode.
3908 switch (Def->getOpcode()) {
3909 default:
3910 llvm_unreachable("Unexpected opcode");
3911 case TargetOpcode::G_ICMP:
3912 case TargetOpcode::G_FCMP: {
3913 MachineOperand &PredOp = Def->getOperand(1);
3916 PredOp.setPredicate(NewP);
3917 break;
3918 }
3919 case TargetOpcode::G_AND:
3920 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3921 break;
3922 case TargetOpcode::G_OR:
3923 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3924 break;
3925 }
3926 Observer.changedInstr(*Def);
3927 }
3928
3929 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3930 MI.eraseFromParent();
3931}
3932
3934 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3935 // Match (xor (and x, y), y) (or any of its commuted cases)
3936 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3937 Register &X = MatchInfo.first;
3938 Register &Y = MatchInfo.second;
3939 Register AndReg = MI.getOperand(1).getReg();
3940 Register SharedReg = MI.getOperand(2).getReg();
3941
3942 // Find a G_AND on either side of the G_XOR.
3943 // Look for one of
3944 //
3945 // (xor (and x, y), SharedReg)
3946 // (xor SharedReg, (and x, y))
3947 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3948 std::swap(AndReg, SharedReg);
3949 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3950 return false;
3951 }
3952
3953 // Only do this if we'll eliminate the G_AND.
3954 if (!MRI.hasOneNonDBGUse(AndReg))
3955 return false;
3956
3957 // We can combine if SharedReg is the same as either the LHS or RHS of the
3958 // G_AND.
3959 if (Y != SharedReg)
3960 std::swap(X, Y);
3961 return Y == SharedReg;
3962}
3963
3965 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3966 // Fold (xor (and x, y), y) -> (and (not x), y)
3967 Register X, Y;
3968 std::tie(X, Y) = MatchInfo;
3969 auto Not = Builder.buildNot(MRI.getType(X), X);
3970 Observer.changingInstr(MI);
3971 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3972 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3973 MI.getOperand(2).setReg(Y);
3974 Observer.changedInstr(MI);
3975}
3976
3978 auto &PtrAdd = cast<GPtrAdd>(MI);
3979 Register DstReg = PtrAdd.getReg(0);
3980 LLT Ty = MRI.getType(DstReg);
3981 const DataLayout &DL = Builder.getMF().getDataLayout();
3982
3983 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3984 return false;
3985
3986 if (Ty.isPointer()) {
3987 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3988 return ConstVal && *ConstVal == 0;
3989 }
3990
3991 assert(Ty.isVector() && "Expecting a vector type");
3992 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3993 return isBuildVectorAllZeros(*VecMI, MRI);
3994}
3995
3997 auto &PtrAdd = cast<GPtrAdd>(MI);
3998 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3999 PtrAdd.eraseFromParent();
4000}
4001
4002/// The second source operand is known to be a power of 2.
4004 Register DstReg = MI.getOperand(0).getReg();
4005 Register Src0 = MI.getOperand(1).getReg();
4006 Register Pow2Src1 = MI.getOperand(2).getReg();
4007 LLT Ty = MRI.getType(DstReg);
4008
4009 // Fold (urem x, pow2) -> (and x, pow2-1)
4010 auto NegOne = Builder.buildConstant(Ty, -1);
4011 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
4012 Builder.buildAnd(DstReg, Src0, Add);
4013 MI.eraseFromParent();
4014}
4015
4017 unsigned &SelectOpNo) const {
4018 Register LHS = MI.getOperand(1).getReg();
4019 Register RHS = MI.getOperand(2).getReg();
4020
4021 Register OtherOperandReg = RHS;
4022 SelectOpNo = 1;
4023 MachineInstr *Select = MRI.getVRegDef(LHS);
4024
4025 // Don't do this unless the old select is going away. We want to eliminate the
4026 // binary operator, not replace a binop with a select.
4027 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4028 !MRI.hasOneNonDBGUse(LHS)) {
4029 OtherOperandReg = LHS;
4030 SelectOpNo = 2;
4031 Select = MRI.getVRegDef(RHS);
4032 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4033 !MRI.hasOneNonDBGUse(RHS))
4034 return false;
4035 }
4036
4037 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
4038 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
4039
4040 if (!isConstantOrConstantVector(*SelectLHS, MRI,
4041 /*AllowFP*/ true,
4042 /*AllowOpaqueConstants*/ false))
4043 return false;
4044 if (!isConstantOrConstantVector(*SelectRHS, MRI,
4045 /*AllowFP*/ true,
4046 /*AllowOpaqueConstants*/ false))
4047 return false;
4048
4049 unsigned BinOpcode = MI.getOpcode();
4050
4051 // We know that one of the operands is a select of constants. Now verify that
4052 // the other binary operator operand is either a constant, or we can handle a
4053 // variable.
4054 bool CanFoldNonConst =
4055 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
4056 (isNullOrNullSplat(*SelectLHS, MRI) ||
4057 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
4058 (isNullOrNullSplat(*SelectRHS, MRI) ||
4059 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
4060 if (CanFoldNonConst)
4061 return true;
4062
4063 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4064 /*AllowFP*/ true,
4065 /*AllowOpaqueConstants*/ false);
4066}
4067
4068/// \p SelectOperand is the operand in binary operator \p MI that is the select
4069/// to fold.
4071 MachineInstr &MI, const unsigned &SelectOperand) const {
4072 Register Dst = MI.getOperand(0).getReg();
4073 Register LHS = MI.getOperand(1).getReg();
4074 Register RHS = MI.getOperand(2).getReg();
4075 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4076
4077 Register SelectCond = Select->getOperand(1).getReg();
4078 Register SelectTrue = Select->getOperand(2).getReg();
4079 Register SelectFalse = Select->getOperand(3).getReg();
4080
4081 LLT Ty = MRI.getType(Dst);
4082 unsigned BinOpcode = MI.getOpcode();
4083
4084 Register FoldTrue, FoldFalse;
4085
4086 // We have a select-of-constants followed by a binary operator with a
4087 // constant. Eliminate the binop by pulling the constant math into the select.
4088 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4089 if (SelectOperand == 1) {
4090 // TODO: SelectionDAG verifies this actually constant folds before
4091 // committing to the combine.
4092
4093 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4094 FoldFalse =
4095 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4096 } else {
4097 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4098 FoldFalse =
4099 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4100 }
4101
4102 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4103 MI.eraseFromParent();
4104}
4105
4106std::optional<SmallVector<Register, 8>>
4107CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4108 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4109 // We want to detect if Root is part of a tree which represents a bunch
4110 // of loads being merged into a larger load. We'll try to recognize patterns
4111 // like, for example:
4112 //
4113 // Reg Reg
4114 // \ /
4115 // OR_1 Reg
4116 // \ /
4117 // OR_2
4118 // \ Reg
4119 // .. /
4120 // Root
4121 //
4122 // Reg Reg Reg Reg
4123 // \ / \ /
4124 // OR_1 OR_2
4125 // \ /
4126 // \ /
4127 // ...
4128 // Root
4129 //
4130 // Each "Reg" may have been produced by a load + some arithmetic. This
4131 // function will save each of them.
4132 SmallVector<Register, 8> RegsToVisit;
4134
4135 // In the "worst" case, we're dealing with a load for each byte. So, there
4136 // are at most #bytes - 1 ORs.
4137 const unsigned MaxIter =
4138 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4139 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4140 if (Ors.empty())
4141 break;
4142 const MachineInstr *Curr = Ors.pop_back_val();
4143 Register OrLHS = Curr->getOperand(1).getReg();
4144 Register OrRHS = Curr->getOperand(2).getReg();
4145
4146 // In the combine, we want to elimate the entire tree.
4147 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4148 return std::nullopt;
4149
4150 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4151 // something that may be a load + arithmetic.
4152 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4153 Ors.push_back(Or);
4154 else
4155 RegsToVisit.push_back(OrLHS);
4156 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4157 Ors.push_back(Or);
4158 else
4159 RegsToVisit.push_back(OrRHS);
4160 }
4161
4162 // We're going to try and merge each register into a wider power-of-2 type,
4163 // so we ought to have an even number of registers.
4164 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4165 return std::nullopt;
4166 return RegsToVisit;
4167}
4168
4169/// Helper function for findLoadOffsetsForLoadOrCombine.
4170///
4171/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4172/// and then moving that value into a specific byte offset.
4173///
4174/// e.g. x[i] << 24
4175///
4176/// \returns The load instruction and the byte offset it is moved into.
4177static std::optional<std::pair<GZExtLoad *, int64_t>>
4178matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4179 const MachineRegisterInfo &MRI) {
4180 assert(MRI.hasOneNonDBGUse(Reg) &&
4181 "Expected Reg to only have one non-debug use?");
4182 Register MaybeLoad;
4183 int64_t Shift;
4184 if (!mi_match(Reg, MRI,
4185 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4186 Shift = 0;
4187 MaybeLoad = Reg;
4188 }
4189
4190 if (Shift % MemSizeInBits != 0)
4191 return std::nullopt;
4192
4193 // TODO: Handle other types of loads.
4194 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4195 if (!Load)
4196 return std::nullopt;
4197
4198 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4199 return std::nullopt;
4200
4201 return std::make_pair(Load, Shift / MemSizeInBits);
4202}
4203
4204std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4205CombinerHelper::findLoadOffsetsForLoadOrCombine(
4207 const SmallVector<Register, 8> &RegsToVisit,
4208 const unsigned MemSizeInBits) const {
4209
4210 // Each load found for the pattern. There should be one for each RegsToVisit.
4211 SmallSetVector<const MachineInstr *, 8> Loads;
4212
4213 // The lowest index used in any load. (The lowest "i" for each x[i].)
4214 int64_t LowestIdx = INT64_MAX;
4215
4216 // The load which uses the lowest index.
4217 GZExtLoad *LowestIdxLoad = nullptr;
4218
4219 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4220 SmallSet<int64_t, 8> SeenIdx;
4221
4222 // Ensure each load is in the same MBB.
4223 // TODO: Support multiple MachineBasicBlocks.
4224 MachineBasicBlock *MBB = nullptr;
4225 const MachineMemOperand *MMO = nullptr;
4226
4227 // Earliest instruction-order load in the pattern.
4228 GZExtLoad *EarliestLoad = nullptr;
4229
4230 // Latest instruction-order load in the pattern.
4231 GZExtLoad *LatestLoad = nullptr;
4232
4233 // Base pointer which every load should share.
4235
4236 // We want to find a load for each register. Each load should have some
4237 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4238 // track of the load which uses the lowest index. Later, we will check if we
4239 // can use its pointer in the final, combined load.
4240 for (auto Reg : RegsToVisit) {
4241 // Find the load, and find the position that it will end up in (e.g. a
4242 // shifted) value.
4243 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4244 if (!LoadAndPos)
4245 return std::nullopt;
4246 GZExtLoad *Load;
4247 int64_t DstPos;
4248 std::tie(Load, DstPos) = *LoadAndPos;
4249
4250 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4251 // it is difficult to check for stores/calls/etc between loads.
4252 MachineBasicBlock *LoadMBB = Load->getParent();
4253 if (!MBB)
4254 MBB = LoadMBB;
4255 if (LoadMBB != MBB)
4256 return std::nullopt;
4257
4258 // Make sure that the MachineMemOperands of every seen load are compatible.
4259 auto &LoadMMO = Load->getMMO();
4260 if (!MMO)
4261 MMO = &LoadMMO;
4262 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4263 return std::nullopt;
4264
4265 // Find out what the base pointer and index for the load is.
4266 Register LoadPtr;
4267 int64_t Idx;
4268 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4269 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4270 LoadPtr = Load->getOperand(1).getReg();
4271 Idx = 0;
4272 }
4273
4274 // Don't combine things like a[i], a[i] -> a bigger load.
4275 if (!SeenIdx.insert(Idx).second)
4276 return std::nullopt;
4277
4278 // Every load must share the same base pointer; don't combine things like:
4279 //
4280 // a[i], b[i + 1] -> a bigger load.
4281 if (!BasePtr.isValid())
4282 BasePtr = LoadPtr;
4283 if (BasePtr != LoadPtr)
4284 return std::nullopt;
4285
4286 if (Idx < LowestIdx) {
4287 LowestIdx = Idx;
4288 LowestIdxLoad = Load;
4289 }
4290
4291 // Keep track of the byte offset that this load ends up at. If we have seen
4292 // the byte offset, then stop here. We do not want to combine:
4293 //
4294 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4295 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4296 return std::nullopt;
4297 Loads.insert(Load);
4298
4299 // Keep track of the position of the earliest/latest loads in the pattern.
4300 // We will check that there are no load fold barriers between them later
4301 // on.
4302 //
4303 // FIXME: Is there a better way to check for load fold barriers?
4304 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4305 EarliestLoad = Load;
4306 if (!LatestLoad || dominates(*LatestLoad, *Load))
4307 LatestLoad = Load;
4308 }
4309
4310 // We found a load for each register. Let's check if each load satisfies the
4311 // pattern.
4312 assert(Loads.size() == RegsToVisit.size() &&
4313 "Expected to find a load for each register?");
4314 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4315 LatestLoad && "Expected at least two loads?");
4316
4317 // Check if there are any stores, calls, etc. between any of the loads. If
4318 // there are, then we can't safely perform the combine.
4319 //
4320 // MaxIter is chosen based off the (worst case) number of iterations it
4321 // typically takes to succeed in the LLVM test suite plus some padding.
4322 //
4323 // FIXME: Is there a better way to check for load fold barriers?
4324 const unsigned MaxIter = 20;
4325 unsigned Iter = 0;
4326 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4327 LatestLoad->getIterator())) {
4328 if (Loads.count(&MI))
4329 continue;
4330 if (MI.isLoadFoldBarrier())
4331 return std::nullopt;
4332 if (Iter++ == MaxIter)
4333 return std::nullopt;
4334 }
4335
4336 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4337}
4338
4341 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4342 assert(MI.getOpcode() == TargetOpcode::G_OR);
4343 MachineFunction &MF = *MI.getMF();
4344 // Assuming a little-endian target, transform:
4345 // s8 *a = ...
4346 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4347 // =>
4348 // s32 val = *((i32)a)
4349 //
4350 // s8 *a = ...
4351 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4352 // =>
4353 // s32 val = BSWAP(*((s32)a))
4354 Register Dst = MI.getOperand(0).getReg();
4355 LLT Ty = MRI.getType(Dst);
4356 if (Ty.isVector())
4357 return false;
4358
4359 // We need to combine at least two loads into this type. Since the smallest
4360 // possible load is into a byte, we need at least a 16-bit wide type.
4361 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4362 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4363 return false;
4364
4365 // Match a collection of non-OR instructions in the pattern.
4366 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4367 if (!RegsToVisit)
4368 return false;
4369
4370 // We have a collection of non-OR instructions. Figure out how wide each of
4371 // the small loads should be based off of the number of potential loads we
4372 // found.
4373 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4374 if (NarrowMemSizeInBits % 8 != 0)
4375 return false;
4376
4377 // Check if each register feeding into each OR is a load from the same
4378 // base pointer + some arithmetic.
4379 //
4380 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4381 //
4382 // Also verify that each of these ends up putting a[i] into the same memory
4383 // offset as a load into a wide type would.
4385 GZExtLoad *LowestIdxLoad, *LatestLoad;
4386 int64_t LowestIdx;
4387 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4388 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4389 if (!MaybeLoadInfo)
4390 return false;
4391 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4392
4393 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4394 // we found before, check if this corresponds to a big or little endian byte
4395 // pattern. If it does, then we can represent it using a load + possibly a
4396 // BSWAP.
4397 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4398 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4399 if (!IsBigEndian)
4400 return false;
4401 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4402 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4403 return false;
4404
4405 // Make sure that the load from the lowest index produces offset 0 in the
4406 // final value.
4407 //
4408 // This ensures that we won't combine something like this:
4409 //
4410 // load x[i] -> byte 2
4411 // load x[i+1] -> byte 0 ---> wide_load x[i]
4412 // load x[i+2] -> byte 1
4413 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4414 const unsigned ZeroByteOffset =
4415 *IsBigEndian
4416 ? bigEndianByteAt(NumLoadsInTy, 0)
4417 : littleEndianByteAt(NumLoadsInTy, 0);
4418 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4419 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4420 ZeroOffsetIdx->second != LowestIdx)
4421 return false;
4422
4423 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4424 // may not use index 0.
4425 Register Ptr = LowestIdxLoad->getPointerReg();
4426 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4427 LegalityQuery::MemDesc MMDesc(MMO);
4428 MMDesc.MemoryTy = Ty;
4430 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4431 return false;
4432 auto PtrInfo = MMO.getPointerInfo();
4433 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4434
4435 // Load must be allowed and fast on the target.
4437 auto &DL = MF.getDataLayout();
4438 unsigned Fast = 0;
4439 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4440 !Fast)
4441 return false;
4442
4443 MatchInfo = [=](MachineIRBuilder &MIB) {
4444 MIB.setInstrAndDebugLoc(*LatestLoad);
4445 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4446 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4447 if (NeedsBSwap)
4448 MIB.buildBSwap(Dst, LoadDst);
4449 };
4450 return true;
4451}
4452
4454 MachineInstr *&ExtMI) const {
4455 auto &PHI = cast<GPhi>(MI);
4456 Register DstReg = PHI.getReg(0);
4457
4458 // TODO: Extending a vector may be expensive, don't do this until heuristics
4459 // are better.
4460 if (MRI.getType(DstReg).isVector())
4461 return false;
4462
4463 // Try to match a phi, whose only use is an extend.
4464 if (!MRI.hasOneNonDBGUse(DstReg))
4465 return false;
4466 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4467 switch (ExtMI->getOpcode()) {
4468 case TargetOpcode::G_ANYEXT:
4469 return true; // G_ANYEXT is usually free.
4470 case TargetOpcode::G_ZEXT:
4471 case TargetOpcode::G_SEXT:
4472 break;
4473 default:
4474 return false;
4475 }
4476
4477 // If the target is likely to fold this extend away, don't propagate.
4478 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4479 return false;
4480
4481 // We don't want to propagate the extends unless there's a good chance that
4482 // they'll be optimized in some way.
4483 // Collect the unique incoming values.
4485 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4486 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4487 switch (DefMI->getOpcode()) {
4488 case TargetOpcode::G_LOAD:
4489 case TargetOpcode::G_TRUNC:
4490 case TargetOpcode::G_SEXT:
4491 case TargetOpcode::G_ZEXT:
4492 case TargetOpcode::G_ANYEXT:
4493 case TargetOpcode::G_CONSTANT:
4494 InSrcs.insert(DefMI);
4495 // Don't try to propagate if there are too many places to create new
4496 // extends, chances are it'll increase code size.
4497 if (InSrcs.size() > 2)
4498 return false;
4499 break;
4500 default:
4501 return false;
4502 }
4503 }
4504 return true;
4505}
4506
4508 MachineInstr *&ExtMI) const {
4509 auto &PHI = cast<GPhi>(MI);
4510 Register DstReg = ExtMI->getOperand(0).getReg();
4511 LLT ExtTy = MRI.getType(DstReg);
4512
4513 // Propagate the extension into the block of each incoming reg's block.
4514 // Use a SetVector here because PHIs can have duplicate edges, and we want
4515 // deterministic iteration order.
4518 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4519 auto SrcReg = PHI.getIncomingValue(I);
4520 auto *SrcMI = MRI.getVRegDef(SrcReg);
4521 if (!SrcMIs.insert(SrcMI))
4522 continue;
4523
4524 // Build an extend after each src inst.
4525 auto *MBB = SrcMI->getParent();
4526 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4527 if (InsertPt != MBB->end() && InsertPt->isPHI())
4528 InsertPt = MBB->getFirstNonPHI();
4529
4530 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4531 Builder.setDebugLoc(MI.getDebugLoc());
4532 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4533 OldToNewSrcMap[SrcMI] = NewExt;
4534 }
4535
4536 // Create a new phi with the extended inputs.
4537 Builder.setInstrAndDebugLoc(MI);
4538 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4539 NewPhi.addDef(DstReg);
4540 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4541 if (!MO.isReg()) {
4542 NewPhi.addMBB(MO.getMBB());
4543 continue;
4544 }
4545 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4546 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4547 }
4548 Builder.insertInstr(NewPhi);
4549 ExtMI->eraseFromParent();
4550}
4551
4553 Register &Reg) const {
4554 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4555 // If we have a constant index, look for a G_BUILD_VECTOR source
4556 // and find the source register that the index maps to.
4557 Register SrcVec = MI.getOperand(1).getReg();
4558 LLT SrcTy = MRI.getType(SrcVec);
4559 if (SrcTy.isScalableVector())
4560 return false;
4561
4562 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4563 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4564 return false;
4565
4566 unsigned VecIdx = Cst->Value.getZExtValue();
4567
4568 // Check if we have a build_vector or build_vector_trunc with an optional
4569 // trunc in front.
4570 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4571 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4572 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4573 }
4574
4575 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4576 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4577 return false;
4578
4579 EVT Ty(getMVTForLLT(SrcTy));
4580 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4581 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4582 return false;
4583
4584 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4585 return true;
4586}
4587
4589 Register &Reg) const {
4590 // Check the type of the register, since it may have come from a
4591 // G_BUILD_VECTOR_TRUNC.
4592 LLT ScalarTy = MRI.getType(Reg);
4593 Register DstReg = MI.getOperand(0).getReg();
4594 LLT DstTy = MRI.getType(DstReg);
4595
4596 if (ScalarTy != DstTy) {
4597 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4598 Builder.buildTrunc(DstReg, Reg);
4599 MI.eraseFromParent();
4600 return;
4601 }
4603}
4604
4607 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4608 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4609 // This combine tries to find build_vector's which have every source element
4610 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4611 // the masked load scalarization is run late in the pipeline. There's already
4612 // a combine for a similar pattern starting from the extract, but that
4613 // doesn't attempt to do it if there are multiple uses of the build_vector,
4614 // which in this case is true. Starting the combine from the build_vector
4615 // feels more natural than trying to find sibling nodes of extracts.
4616 // E.g.
4617 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4618 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4619 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4620 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4621 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4622 // ==>
4623 // replace ext{1,2,3,4} with %s{1,2,3,4}
4624
4625 Register DstReg = MI.getOperand(0).getReg();
4626 LLT DstTy = MRI.getType(DstReg);
4627 unsigned NumElts = DstTy.getNumElements();
4628
4629 SmallBitVector ExtractedElts(NumElts);
4630 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4631 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4632 return false;
4633 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4634 if (!Cst)
4635 return false;
4636 unsigned Idx = Cst->getZExtValue();
4637 if (Idx >= NumElts)
4638 return false; // Out of range.
4639 ExtractedElts.set(Idx);
4640 SrcDstPairs.emplace_back(
4641 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4642 }
4643 // Match if every element was extracted.
4644 return ExtractedElts.all();
4645}
4646
4649 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4650 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4651 for (auto &Pair : SrcDstPairs) {
4652 auto *ExtMI = Pair.second;
4653 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4654 ExtMI->eraseFromParent();
4655 }
4656 MI.eraseFromParent();
4657}
4658
4661 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4662 applyBuildFnNoErase(MI, MatchInfo);
4663 MI.eraseFromParent();
4664}
4665
4668 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4669 MatchInfo(Builder);
4670}
4671
4673 bool AllowScalarConstants,
4674 BuildFnTy &MatchInfo) const {
4675 assert(MI.getOpcode() == TargetOpcode::G_OR);
4676
4677 Register Dst = MI.getOperand(0).getReg();
4678 LLT Ty = MRI.getType(Dst);
4679 unsigned BitWidth = Ty.getScalarSizeInBits();
4680
4681 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4682 unsigned FshOpc = 0;
4683
4684 // Match (or (shl ...), (lshr ...)).
4685 if (!mi_match(Dst, MRI,
4686 // m_GOr() handles the commuted version as well.
4687 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4688 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4689 return false;
4690
4691 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4692 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4693 int64_t CstShlAmt = 0, CstLShrAmt;
4694 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4695 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4696 CstShlAmt + CstLShrAmt == BitWidth) {
4697 FshOpc = TargetOpcode::G_FSHR;
4698 Amt = LShrAmt;
4699 } else if (mi_match(LShrAmt, MRI,
4701 ShlAmt == Amt) {
4702 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4703 FshOpc = TargetOpcode::G_FSHL;
4704 } else if (mi_match(ShlAmt, MRI,
4706 LShrAmt == Amt) {
4707 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4708 FshOpc = TargetOpcode::G_FSHR;
4709 } else {
4710 return false;
4711 }
4712
4713 LLT AmtTy = MRI.getType(Amt);
4714 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4715 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4716 return false;
4717
4718 MatchInfo = [=](MachineIRBuilder &B) {
4719 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4720 };
4721 return true;
4722}
4723
4724/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4726 unsigned Opc = MI.getOpcode();
4727 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4728 Register X = MI.getOperand(1).getReg();
4729 Register Y = MI.getOperand(2).getReg();
4730 if (X != Y)
4731 return false;
4732 unsigned RotateOpc =
4733 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4734 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4735}
4736
4738 unsigned Opc = MI.getOpcode();
4739 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4740 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4741 Observer.changingInstr(MI);
4742 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4743 : TargetOpcode::G_ROTR));
4744 MI.removeOperand(2);
4745 Observer.changedInstr(MI);
4746}
4747
4748// Fold (rot x, c) -> (rot x, c % BitSize)
4750 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4751 MI.getOpcode() == TargetOpcode::G_ROTR);
4752 unsigned Bitsize =
4753 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4754 Register AmtReg = MI.getOperand(2).getReg();
4755 bool OutOfRange = false;
4756 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4757 if (auto *CI = dyn_cast<ConstantInt>(C))
4758 OutOfRange |= CI->getValue().uge(Bitsize);
4759 return true;
4760 };
4761 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4762}
4763
4765 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4766 MI.getOpcode() == TargetOpcode::G_ROTR);
4767 unsigned Bitsize =
4768 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4769 Register Amt = MI.getOperand(2).getReg();
4770 LLT AmtTy = MRI.getType(Amt);
4771 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4772 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4773 Observer.changingInstr(MI);
4774 MI.getOperand(2).setReg(Amt);
4775 Observer.changedInstr(MI);
4776}
4777
4779 int64_t &MatchInfo) const {
4780 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4781 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4782
4783 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4784 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4785 // KnownBits on the LHS in two cases:
4786 //
4787 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4788 // we cannot do any transforms so we can safely bail out early.
4789 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4790 // >=0.
4791 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4792 if (KnownRHS.isUnknown())
4793 return false;
4794
4795 std::optional<bool> KnownVal;
4796 if (KnownRHS.isZero()) {
4797 // ? uge 0 -> always true
4798 // ? ult 0 -> always false
4799 if (Pred == CmpInst::ICMP_UGE)
4800 KnownVal = true;
4801 else if (Pred == CmpInst::ICMP_ULT)
4802 KnownVal = false;
4803 }
4804
4805 if (!KnownVal) {
4806 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4807 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4808 }
4809
4810 if (!KnownVal)
4811 return false;
4812 MatchInfo =
4813 *KnownVal
4815 /*IsVector = */
4816 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4817 /* IsFP = */ false)
4818 : 0;
4819 return true;
4820}
4821
4824 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4825 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4826 // Given:
4827 //
4828 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4829 // %cmp = G_ICMP ne %x, 0
4830 //
4831 // Or:
4832 //
4833 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4834 // %cmp = G_ICMP eq %x, 1
4835 //
4836 // We can replace %cmp with %x assuming true is 1 on the target.
4837 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4838 if (!CmpInst::isEquality(Pred))
4839 return false;
4840 Register Dst = MI.getOperand(0).getReg();
4841 LLT DstTy = MRI.getType(Dst);
4843 /* IsFP = */ false) != 1)
4844 return false;
4845 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4846 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4847 return false;
4848 Register LHS = MI.getOperand(2).getReg();
4849 auto KnownLHS = VT->getKnownBits(LHS);
4850 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4851 return false;
4852 // Make sure replacing Dst with the LHS is a legal operation.
4853 LLT LHSTy = MRI.getType(LHS);
4854 unsigned LHSSize = LHSTy.getSizeInBits();
4855 unsigned DstSize = DstTy.getSizeInBits();
4856 unsigned Op = TargetOpcode::COPY;
4857 if (DstSize != LHSSize)
4858 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4859 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4860 return false;
4861 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4862 return true;
4863}
4864
4865// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4868 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4869 assert(MI.getOpcode() == TargetOpcode::G_AND);
4870
4871 // Ignore vector types to simplify matching the two constants.
4872 // TODO: do this for vectors and scalars via a demanded bits analysis.
4873 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4874 if (Ty.isVector())
4875 return false;
4876
4877 Register Src;
4878 Register AndMaskReg;
4879 int64_t AndMaskBits;
4880 int64_t OrMaskBits;
4881 if (!mi_match(MI, MRI,
4882 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4883 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4884 return false;
4885
4886 // Check if OrMask could turn on any bits in Src.
4887 if (AndMaskBits & OrMaskBits)
4888 return false;
4889
4890 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4891 Observer.changingInstr(MI);
4892 // Canonicalize the result to have the constant on the RHS.
4893 if (MI.getOperand(1).getReg() == AndMaskReg)
4894 MI.getOperand(2).setReg(AndMaskReg);
4895 MI.getOperand(1).setReg(Src);
4896 Observer.changedInstr(MI);
4897 };
4898 return true;
4899}
4900
4901/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4904 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4905 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4906 Register Dst = MI.getOperand(0).getReg();
4907 Register Src = MI.getOperand(1).getReg();
4908 LLT Ty = MRI.getType(Src);
4910 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4911 return false;
4912 int64_t Width = MI.getOperand(2).getImm();
4913 Register ShiftSrc;
4914 int64_t ShiftImm;
4915 if (!mi_match(
4916 Src, MRI,
4917 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4918 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4919 return false;
4920 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4921 return false;
4922
4923 MatchInfo = [=](MachineIRBuilder &B) {
4924 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4925 auto Cst2 = B.buildConstant(ExtractTy, Width);
4926 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4927 };
4928 return true;
4929}
4930
4931/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4933 BuildFnTy &MatchInfo) const {
4934 GAnd *And = cast<GAnd>(&MI);
4935 Register Dst = And->getReg(0);
4936 LLT Ty = MRI.getType(Dst);
4938 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4939 // into account.
4940 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4941 return false;
4942
4943 int64_t AndImm, LSBImm;
4944 Register ShiftSrc;
4945 const unsigned Size = Ty.getScalarSizeInBits();
4946 if (!mi_match(And->getReg(0), MRI,
4947 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4948 m_ICst(AndImm))))
4949 return false;
4950
4951 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4952 auto MaybeMask = static_cast<uint64_t>(AndImm);
4953 if (MaybeMask & (MaybeMask + 1))
4954 return false;
4955
4956 // LSB must fit within the register.
4957 if (static_cast<uint64_t>(LSBImm) >= Size)
4958 return false;
4959
4960 uint64_t Width = APInt(Size, AndImm).countr_one();
4961 MatchInfo = [=](MachineIRBuilder &B) {
4962 auto WidthCst = B.buildConstant(ExtractTy, Width);
4963 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4964 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4965 };
4966 return true;
4967}
4968
4971 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4972 const unsigned Opcode = MI.getOpcode();
4973 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4974
4975 const Register Dst = MI.getOperand(0).getReg();
4976
4977 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4978 ? TargetOpcode::G_SBFX
4979 : TargetOpcode::G_UBFX;
4980
4981 // Check if the type we would use for the extract is legal
4982 LLT Ty = MRI.getType(Dst);
4984 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4985 return false;
4986
4987 Register ShlSrc;
4988 int64_t ShrAmt;
4989 int64_t ShlAmt;
4990 const unsigned Size = Ty.getScalarSizeInBits();
4991
4992 // Try to match shr (shl x, c1), c2
4993 if (!mi_match(Dst, MRI,
4994 m_BinOp(Opcode,
4995 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4996 m_ICst(ShrAmt))))
4997 return false;
4998
4999 // Make sure that the shift sizes can fit a bitfield extract
5000 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
5001 return false;
5002
5003 // Skip this combine if the G_SEXT_INREG combine could handle it
5004 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
5005 return false;
5006
5007 // Calculate start position and width of the extract
5008 const int64_t Pos = ShrAmt - ShlAmt;
5009 const int64_t Width = Size - ShrAmt;
5010
5011 MatchInfo = [=](MachineIRBuilder &B) {
5012 auto WidthCst = B.buildConstant(ExtractTy, Width);
5013 auto PosCst = B.buildConstant(ExtractTy, Pos);
5014 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
5015 };
5016 return true;
5017}
5018
5021 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5022 const unsigned Opcode = MI.getOpcode();
5023 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
5024
5025 const Register Dst = MI.getOperand(0).getReg();
5026 LLT Ty = MRI.getType(Dst);
5028 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
5029 return false;
5030
5031 // Try to match shr (and x, c1), c2
5032 Register AndSrc;
5033 int64_t ShrAmt;
5034 int64_t SMask;
5035 if (!mi_match(Dst, MRI,
5036 m_BinOp(Opcode,
5037 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
5038 m_ICst(ShrAmt))))
5039 return false;
5040
5041 const unsigned Size = Ty.getScalarSizeInBits();
5042 if (ShrAmt < 0 || ShrAmt >= Size)
5043 return false;
5044
5045 // If the shift subsumes the mask, emit the 0 directly.
5046 if (0 == (SMask >> ShrAmt)) {
5047 MatchInfo = [=](MachineIRBuilder &B) {
5048 B.buildConstant(Dst, 0);
5049 };
5050 return true;
5051 }
5052
5053 // Check that ubfx can do the extraction, with no holes in the mask.
5054 uint64_t UMask = SMask;
5055 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
5057 if (!isMask_64(UMask))
5058 return false;
5059
5060 // Calculate start position and width of the extract.
5061 const int64_t Pos = ShrAmt;
5062 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
5063
5064 // It's preferable to keep the shift, rather than form G_SBFX.
5065 // TODO: remove the G_AND via demanded bits analysis.
5066 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5067 return false;
5068
5069 MatchInfo = [=](MachineIRBuilder &B) {
5070 auto WidthCst = B.buildConstant(ExtractTy, Width);
5071 auto PosCst = B.buildConstant(ExtractTy, Pos);
5072 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5073 };
5074 return true;
5075}
5076
5077bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5078 MachineInstr &MI) const {
5079 auto &PtrAdd = cast<GPtrAdd>(MI);
5080
5081 Register Src1Reg = PtrAdd.getBaseReg();
5082 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5083 if (!Src1Def)
5084 return false;
5085
5086 Register Src2Reg = PtrAdd.getOffsetReg();
5087
5088 if (MRI.hasOneNonDBGUse(Src1Reg))
5089 return false;
5090
5091 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5092 if (!C1)
5093 return false;
5094 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5095 if (!C2)
5096 return false;
5097
5098 const APInt &C1APIntVal = *C1;
5099 const APInt &C2APIntVal = *C2;
5100 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5101
5102 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5103 // This combine may end up running before ptrtoint/inttoptr combines
5104 // manage to eliminate redundant conversions, so try to look through them.
5105 MachineInstr *ConvUseMI = &UseMI;
5106 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5107 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5108 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5109 Register DefReg = ConvUseMI->getOperand(0).getReg();
5110 if (!MRI.hasOneNonDBGUse(DefReg))
5111 break;
5112 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5113 ConvUseOpc = ConvUseMI->getOpcode();
5114 }
5115 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5116 if (!LdStMI)
5117 continue;
5118 // Is x[offset2] already not a legal addressing mode? If so then
5119 // reassociating the constants breaks nothing (we test offset2 because
5120 // that's the one we hope to fold into the load or store).
5121 TargetLoweringBase::AddrMode AM;
5122 AM.HasBaseReg = true;
5123 AM.BaseOffs = C2APIntVal.getSExtValue();
5124 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5125 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5126 PtrAdd.getMF()->getFunction().getContext());
5127 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5128 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5129 AccessTy, AS))
5130 continue;
5131
5132 // Would x[offset1+offset2] still be a legal addressing mode?
5133 AM.BaseOffs = CombinedValue;
5134 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5135 AccessTy, AS))
5136 return true;
5137 }
5138
5139 return false;
5140}
5141
5143 MachineInstr *RHS,
5144 BuildFnTy &MatchInfo) const {
5145 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5146 Register Src1Reg = MI.getOperand(1).getReg();
5147 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5148 return false;
5149 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5150 if (!C2)
5151 return false;
5152
5153 // If both additions are nuw, the reassociated additions are also nuw.
5154 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5155 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5156 // therefore also nusw.
5157 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5158 // the new G_PTR_ADDs are then also inbounds.
5159 unsigned PtrAddFlags = MI.getFlags();
5160 unsigned AddFlags = RHS->getFlags();
5161 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5162 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5163 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5164 unsigned Flags = 0;
5165 if (IsNoUWrap)
5167 if (IsNoUSWrap)
5169 if (IsInBounds)
5171
5172 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5173 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5174
5175 auto NewBase =
5176 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5177 Observer.changingInstr(MI);
5178 MI.getOperand(1).setReg(NewBase.getReg(0));
5179 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5180 MI.setFlags(Flags);
5181 Observer.changedInstr(MI);
5182 };
5183 return !reassociationCanBreakAddressingModePattern(MI);
5184}
5185
5187 MachineInstr *LHS,
5188 MachineInstr *RHS,
5189 BuildFnTy &MatchInfo) const {
5190 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5191 // if and only if (G_PTR_ADD X, C) has one use.
5192 Register LHSBase;
5193 std::optional<ValueAndVReg> LHSCstOff;
5194 if (!mi_match(MI.getBaseReg(), MRI,
5195 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5196 return false;
5197
5198 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5199
5200 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5201 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5202 // so the new G_PTR_ADDs are also inbounds.
5203 unsigned PtrAddFlags = MI.getFlags();
5204 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5205 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5206 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5208 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5210 unsigned Flags = 0;
5211 if (IsNoUWrap)
5213 if (IsNoUSWrap)
5215 if (IsInBounds)
5217
5218 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5219 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5220 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5221 // doesn't happen.
5222 LHSPtrAdd->moveBefore(&MI);
5223 Register RHSReg = MI.getOffsetReg();
5224 // set VReg will cause type mismatch if it comes from extend/trunc
5225 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5226 Observer.changingInstr(MI);
5227 MI.getOperand(2).setReg(NewCst.getReg(0));
5228 MI.setFlags(Flags);
5229 Observer.changedInstr(MI);
5230 Observer.changingInstr(*LHSPtrAdd);
5231 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5232 LHSPtrAdd->setFlags(Flags);
5233 Observer.changedInstr(*LHSPtrAdd);
5234 };
5235 return !reassociationCanBreakAddressingModePattern(MI);
5236}
5237
5239 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5240 BuildFnTy &MatchInfo) const {
5241 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5242 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5243 if (!LHSPtrAdd)
5244 return false;
5245
5246 Register Src2Reg = MI.getOperand(2).getReg();
5247 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5248 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5249 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5250 if (!C1)
5251 return false;
5252 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5253 if (!C2)
5254 return false;
5255
5256 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5257 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5258 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5259 // largest signed integer that fits into the index type, which is the maximum
5260 // size of allocated objects according to the IR Language Reference.
5261 unsigned PtrAddFlags = MI.getFlags();
5262 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5263 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5264 bool IsInBounds =
5265 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5266 unsigned Flags = 0;
5267 if (IsNoUWrap)
5269 if (IsInBounds) {
5272 }
5273
5274 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5275 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5276 Observer.changingInstr(MI);
5277 MI.getOperand(1).setReg(LHSSrc1);
5278 MI.getOperand(2).setReg(NewCst.getReg(0));
5279 MI.setFlags(Flags);
5280 Observer.changedInstr(MI);
5281 };
5282 return !reassociationCanBreakAddressingModePattern(MI);
5283}
5284
5286 BuildFnTy &MatchInfo) const {
5287 auto &PtrAdd = cast<GPtrAdd>(MI);
5288 // We're trying to match a few pointer computation patterns here for
5289 // re-association opportunities.
5290 // 1) Isolating a constant operand to be on the RHS, e.g.:
5291 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5292 //
5293 // 2) Folding two constants in each sub-tree as long as such folding
5294 // doesn't break a legal addressing mode.
5295 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5296 //
5297 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5298 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5299 // iif (G_PTR_ADD X, C) has one use.
5300 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5301 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5302
5303 // Try to match example 2.
5304 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5305 return true;
5306
5307 // Try to match example 3.
5308 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5309 return true;
5310
5311 // Try to match example 1.
5312 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5313 return true;
5314
5315 return false;
5316}
5318 Register OpLHS, Register OpRHS,
5319 BuildFnTy &MatchInfo) const {
5320 LLT OpRHSTy = MRI.getType(OpRHS);
5321 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5322
5323 if (OpLHSDef->getOpcode() != Opc)
5324 return false;
5325
5326 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5327 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5328 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5329
5330 // If the inner op is (X op C), pull the constant out so it can be folded with
5331 // other constants in the expression tree. Folding is not guaranteed so we
5332 // might have (C1 op C2). In that case do not pull a constant out because it
5333 // won't help and can lead to infinite loops.
5334 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5335 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5336 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5337 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5338 MatchInfo = [=](MachineIRBuilder &B) {
5339 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5340 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5341 };
5342 return true;
5343 }
5344 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5345 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5346 // iff (op x, c1) has one use
5347 MatchInfo = [=](MachineIRBuilder &B) {
5348 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5349 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5350 };
5351 return true;
5352 }
5353 }
5354
5355 return false;
5356}
5357
5359 BuildFnTy &MatchInfo) const {
5360 // We don't check if the reassociation will break a legal addressing mode
5361 // here since pointer arithmetic is handled by G_PTR_ADD.
5362 unsigned Opc = MI.getOpcode();
5363 Register DstReg = MI.getOperand(0).getReg();
5364 Register LHSReg = MI.getOperand(1).getReg();
5365 Register RHSReg = MI.getOperand(2).getReg();
5366
5367 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5368 return true;
5369 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5370 return true;
5371 return false;
5372}
5373
5375 APInt &MatchInfo) const {
5376 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5377 Register SrcOp = MI.getOperand(1).getReg();
5378
5379 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5380 MatchInfo = *MaybeCst;
5381 return true;
5382 }
5383
5384 return false;
5385}
5386
5388 BuildFnTy &MatchInfo) const {
5389 Register Dst = MI.getOperand(0).getReg();
5390 auto Csts = ConstantFoldUnaryIntOp(MI.getOpcode(), MRI.getType(Dst),
5391 MI.getOperand(1).getReg(), MRI);
5392 if (Csts.empty())
5393 return false;
5394
5395 MatchInfo = [Dst, Csts = std::move(Csts)](MachineIRBuilder &B) {
5396 if (Csts.size() == 1)
5397 B.buildConstant(Dst, Csts[0]);
5398 else
5399 B.buildBuildVectorConstant(Dst, Csts);
5400 };
5401 return true;
5402}
5403
5405 APInt &MatchInfo) const {
5406 Register Op1 = MI.getOperand(1).getReg();
5407 Register Op2 = MI.getOperand(2).getReg();
5408 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5409 if (!MaybeCst)
5410 return false;
5411 MatchInfo = *MaybeCst;
5412 return true;
5413}
5414
5416 ConstantFP *&MatchInfo) const {
5417 Register Op1 = MI.getOperand(1).getReg();
5418 Register Op2 = MI.getOperand(2).getReg();
5419 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5420 if (!MaybeCst)
5421 return false;
5422 MatchInfo =
5423 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5424 return true;
5425}
5426
5428 ConstantFP *&MatchInfo) const {
5429 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5430 MI.getOpcode() == TargetOpcode::G_FMAD);
5431 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5432
5433 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5434 if (!Op3Cst)
5435 return false;
5436
5437 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5438 if (!Op2Cst)
5439 return false;
5440
5441 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5442 if (!Op1Cst)
5443 return false;
5444
5445 APFloat Op1F = Op1Cst->getValueAPF();
5446 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5448 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5449 return true;
5450}
5451
5454 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5455 // Look for a binop feeding into an AND with a mask:
5456 //
5457 // %add = G_ADD %lhs, %rhs
5458 // %and = G_AND %add, 000...11111111
5459 //
5460 // Check if it's possible to perform the binop at a narrower width and zext
5461 // back to the original width like so:
5462 //
5463 // %narrow_lhs = G_TRUNC %lhs
5464 // %narrow_rhs = G_TRUNC %rhs
5465 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5466 // %new_add = G_ZEXT %narrow_add
5467 // %and = G_AND %new_add, 000...11111111
5468 //
5469 // This can allow later combines to eliminate the G_AND if it turns out
5470 // that the mask is irrelevant.
5471 assert(MI.getOpcode() == TargetOpcode::G_AND);
5472 Register Dst = MI.getOperand(0).getReg();
5473 Register AndLHS = MI.getOperand(1).getReg();
5474 Register AndRHS = MI.getOperand(2).getReg();
5475 LLT WideTy = MRI.getType(Dst);
5476
5477 // If the potential binop has more than one use, then it's possible that one
5478 // of those uses will need its full width.
5479 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5480 return false;
5481
5482 // Check if the LHS feeding the AND is impacted by the high bits that we're
5483 // masking out.
5484 //
5485 // e.g. for 64-bit x, y:
5486 //
5487 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5488 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5489 if (!LHSInst)
5490 return false;
5491 unsigned LHSOpc = LHSInst->getOpcode();
5492 switch (LHSOpc) {
5493 default:
5494 return false;
5495 case TargetOpcode::G_ADD:
5496 case TargetOpcode::G_SUB:
5497 case TargetOpcode::G_MUL:
5498 case TargetOpcode::G_AND:
5499 case TargetOpcode::G_OR:
5500 case TargetOpcode::G_XOR:
5501 break;
5502 }
5503
5504 // Find the mask on the RHS.
5505 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5506 if (!Cst)
5507 return false;
5508 auto Mask = Cst->Value;
5509 if (!Mask.isMask())
5510 return false;
5511
5512 // No point in combining if there's nothing to truncate.
5513 unsigned NarrowWidth = Mask.countr_one();
5514 if (NarrowWidth == WideTy.getSizeInBits())
5515 return false;
5516 LLT NarrowTy = LLT::integer(NarrowWidth);
5517
5518 // Check if adding the zext + truncates could be harmful.
5519 auto &MF = *MI.getMF();
5520 const auto &TLI = getTargetLowering();
5521 LLVMContext &Ctx = MF.getFunction().getContext();
5522 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5523 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5524 return false;
5525 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5526 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5527 return false;
5528 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5529 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5530 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5531 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5532 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5533 auto NarrowBinOp =
5534 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5535 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5536 Observer.changingInstr(MI);
5537 MI.getOperand(1).setReg(Ext.getReg(0));
5538 Observer.changedInstr(MI);
5539 };
5540 return true;
5541}
5542
5544 BuildFnTy &MatchInfo) const {
5545 unsigned Opc = MI.getOpcode();
5546 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5547
5548 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5549 return false;
5550
5551 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5552 Observer.changingInstr(MI);
5553 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5554 : TargetOpcode::G_SADDO;
5555 MI.setDesc(Builder.getTII().get(NewOpc));
5556 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5557 Observer.changedInstr(MI);
5558 };
5559 return true;
5560}
5561
5563 BuildFnTy &MatchInfo) const {
5564 // (G_*MULO x, 0) -> 0 + no carry out
5565 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5566 MI.getOpcode() == TargetOpcode::G_SMULO);
5567 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5568 return false;
5569 Register Dst = MI.getOperand(0).getReg();
5570 Register Carry = MI.getOperand(1).getReg();
5571 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5572 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5573 return false;
5574 MatchInfo = [=](MachineIRBuilder &B) {
5575 B.buildConstant(Dst, 0);
5576 B.buildConstant(Carry, 0);
5577 };
5578 return true;
5579}
5580
5582 BuildFnTy &MatchInfo) const {
5583 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5584 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5585 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5586 MI.getOpcode() == TargetOpcode::G_SADDE ||
5587 MI.getOpcode() == TargetOpcode::G_USUBE ||
5588 MI.getOpcode() == TargetOpcode::G_SSUBE);
5589 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5590 return false;
5591 MatchInfo = [&](MachineIRBuilder &B) {
5592 unsigned NewOpcode;
5593 switch (MI.getOpcode()) {
5594 case TargetOpcode::G_UADDE:
5595 NewOpcode = TargetOpcode::G_UADDO;
5596 break;
5597 case TargetOpcode::G_SADDE:
5598 NewOpcode = TargetOpcode::G_SADDO;
5599 break;
5600 case TargetOpcode::G_USUBE:
5601 NewOpcode = TargetOpcode::G_USUBO;
5602 break;
5603 case TargetOpcode::G_SSUBE:
5604 NewOpcode = TargetOpcode::G_SSUBO;
5605 break;
5606 }
5607 Observer.changingInstr(MI);
5608 MI.setDesc(B.getTII().get(NewOpcode));
5609 MI.removeOperand(4);
5610 Observer.changedInstr(MI);
5611 };
5612 return true;
5613}
5614
5616 BuildFnTy &MatchInfo) const {
5617 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5618 Register Dst = MI.getOperand(0).getReg();
5619 // (x + y) - z -> x (if y == z)
5620 // (x + y) - z -> y (if x == z)
5621 Register X, Y, Z;
5622 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5623 Register ReplaceReg;
5624 int64_t CstX, CstY;
5625 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5627 ReplaceReg = X;
5628 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5630 ReplaceReg = Y;
5631 if (ReplaceReg) {
5632 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5633 return true;
5634 }
5635 }
5636
5637 // x - (y + z) -> 0 - y (if x == z)
5638 // x - (y + z) -> 0 - z (if x == y)
5639 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5640 Register ReplaceReg;
5641 int64_t CstX;
5642 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5644 ReplaceReg = Y;
5645 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5647 ReplaceReg = Z;
5648 if (ReplaceReg) {
5649 MatchInfo = [=](MachineIRBuilder &B) {
5650 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5651 B.buildSub(Dst, Zero, ReplaceReg);
5652 };
5653 return true;
5654 }
5655 }
5656 return false;
5657}
5658
5660 unsigned Opcode = MI.getOpcode();
5661 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5662 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5663 Register Dst = UDivorRem.getReg(0);
5664 Register LHS = UDivorRem.getReg(1);
5665 Register RHS = UDivorRem.getReg(2);
5666 LLT Ty = MRI.getType(Dst);
5667 LLT ScalarTy = Ty.getScalarType();
5668 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5670 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5671
5672 auto &MIB = Builder;
5673
5674 bool UseSRL = false;
5675 SmallVector<Register, 16> Shifts, Factors;
5676 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5677 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5678
5679 auto BuildExactUDIVPattern = [&](const Constant *C) {
5680 // Don't recompute inverses for each splat element.
5681 if (IsSplat && !Factors.empty()) {
5682 Shifts.push_back(Shifts[0]);
5683 Factors.push_back(Factors[0]);
5684 return true;
5685 }
5686
5687 auto *CI = cast<ConstantInt>(C);
5688 APInt Divisor = CI->getValue();
5689 unsigned Shift = Divisor.countr_zero();
5690 if (Shift) {
5691 Divisor.lshrInPlace(Shift);
5692 UseSRL = true;
5693 }
5694
5695 // Calculate the multiplicative inverse modulo BW.
5696 APInt Factor = Divisor.multiplicativeInverse();
5697 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5698 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5699 return true;
5700 };
5701
5702 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5703 // Collect all magic values from the build vector.
5704 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5705 llvm_unreachable("Expected unary predicate match to succeed");
5706
5707 Register Shift, Factor;
5708 if (Ty.isVector()) {
5709 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5710 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5711 } else {
5712 Shift = Shifts[0];
5713 Factor = Factors[0];
5714 }
5715
5716 Register Res = LHS;
5717
5718 if (UseSRL)
5719 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5720
5721 return MIB.buildMul(Ty, Res, Factor);
5722 }
5723
5724 unsigned KnownLeadingZeros =
5725 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5726
5727 bool UseNPQ = false;
5728 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5729 auto BuildUDIVPattern = [&](const Constant *C) {
5730 auto *CI = cast<ConstantInt>(C);
5731 const APInt &Divisor = CI->getValue();
5732
5733 bool SelNPQ = false;
5734 APInt Magic(Divisor.getBitWidth(), 0);
5735 unsigned PreShift = 0, PostShift = 0;
5736
5737 // Magic algorithm doesn't work for division by 1. We need to emit a select
5738 // at the end.
5739 // TODO: Use undef values for divisor of 1.
5740 if (!Divisor.isOne()) {
5741
5742 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5743 // in the dividend exceeds the leading zeros for the divisor.
5746 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5747
5748 Magic = std::move(magics.Magic);
5749
5750 assert(magics.PreShift < Divisor.getBitWidth() &&
5751 "We shouldn't generate an undefined shift!");
5752 assert(magics.PostShift < Divisor.getBitWidth() &&
5753 "We shouldn't generate an undefined shift!");
5754 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5755 PreShift = magics.PreShift;
5756 PostShift = magics.PostShift;
5757 SelNPQ = magics.IsAdd;
5758 }
5759
5760 PreShifts.push_back(
5761 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5762 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5763 NPQFactors.push_back(
5764 MIB.buildConstant(ScalarTy,
5765 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5766 : APInt::getZero(EltBits))
5767 .getReg(0));
5768 PostShifts.push_back(
5769 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5770 UseNPQ |= SelNPQ;
5771 return true;
5772 };
5773
5774 // Collect the shifts/magic values from each element.
5775 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5776 (void)Matched;
5777 assert(Matched && "Expected unary predicate match to succeed");
5778
5779 Register PreShift, PostShift, MagicFactor, NPQFactor;
5780 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5781 if (RHSDef) {
5782 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5783 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5784 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5785 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5786 } else {
5787 assert(MRI.getType(RHS).isScalar() &&
5788 "Non-build_vector operation should have been a scalar");
5789 PreShift = PreShifts[0];
5790 MagicFactor = MagicFactors[0];
5791 PostShift = PostShifts[0];
5792 }
5793
5794 Register Q = LHS;
5795 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5796
5797 // Multiply the numerator (operand 0) by the magic value.
5798 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5799
5800 if (UseNPQ) {
5801 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5802
5803 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5804 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5805 if (Ty.isVector())
5806 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5807 else
5808 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5809
5810 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5811 }
5812
5813 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5814 auto One = MIB.buildConstant(Ty, 1);
5815 auto IsOne = MIB.buildICmp(
5817 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5818 RHS, One);
5819 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5820
5821 if (Opcode == TargetOpcode::G_UREM) {
5822 auto Prod = MIB.buildMul(Ty, ret, RHS);
5823 return MIB.buildSub(Ty, LHS, Prod);
5824 }
5825 return ret;
5826}
5827
5829 unsigned Opcode = MI.getOpcode();
5830 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5831 Register Dst = MI.getOperand(0).getReg();
5832 Register RHS = MI.getOperand(2).getReg();
5833 LLT DstTy = MRI.getType(Dst);
5834
5835 auto &MF = *MI.getMF();
5836 AttributeList Attr = MF.getFunction().getAttributes();
5837 const auto &TLI = getTargetLowering();
5838 LLVMContext &Ctx = MF.getFunction().getContext();
5839 if (DstTy.getScalarSizeInBits() == 1 ||
5840 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5841 return false;
5842
5843 // Don't do this for minsize because the instruction sequence is usually
5844 // larger.
5845 if (MF.getFunction().hasMinSize())
5846 return false;
5847
5848 if (Opcode == TargetOpcode::G_UDIV &&
5850 return matchUnaryPredicate(
5851 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5852 }
5853
5854 auto *RHSDef = MRI.getVRegDef(RHS);
5855 if (!isConstantOrConstantVector(*RHSDef, MRI))
5856 return false;
5857
5858 // Don't do this if the types are not going to be legal.
5859 if (LI) {
5860 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5861 return false;
5862 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5863 return false;
5865 {TargetOpcode::G_ICMP,
5866 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5867 DstTy}}))
5868 return false;
5869 if (Opcode == TargetOpcode::G_UREM &&
5870 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5871 return false;
5872 }
5873
5874 return matchUnaryPredicate(
5875 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5876}
5877
5879 auto *NewMI = buildUDivOrURemUsingMul(MI);
5880 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5881}
5882
5884 unsigned Opcode = MI.getOpcode();
5885 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5886 Register Dst = MI.getOperand(0).getReg();
5887 Register RHS = MI.getOperand(2).getReg();
5888 LLT DstTy = MRI.getType(Dst);
5889 auto SizeInBits = DstTy.getScalarSizeInBits();
5890 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5891
5892 auto &MF = *MI.getMF();
5893 AttributeList Attr = MF.getFunction().getAttributes();
5894 const auto &TLI = getTargetLowering();
5895 LLVMContext &Ctx = MF.getFunction().getContext();
5896 if (DstTy.getScalarSizeInBits() < 3 ||
5897 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5898 return false;
5899
5900 // Don't do this for minsize because the instruction sequence is usually
5901 // larger.
5902 if (MF.getFunction().hasMinSize())
5903 return false;
5904
5905 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5906 if (Opcode == TargetOpcode::G_SDIV &&
5908 return matchUnaryPredicate(
5909 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5910 }
5911
5912 auto *RHSDef = MRI.getVRegDef(RHS);
5913 if (!isConstantOrConstantVector(*RHSDef, MRI))
5914 return false;
5915
5916 // Don't do this if the types are not going to be legal.
5917 if (LI) {
5918 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5919 return false;
5920 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5921 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5922 return false;
5923 if (Opcode == TargetOpcode::G_SREM &&
5924 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5925 return false;
5926 }
5927
5928 return matchUnaryPredicate(
5929 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5930}
5931
5933 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5934 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5935}
5936
5938 unsigned Opcode = MI.getOpcode();
5939 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5940 Opcode == TargetOpcode::G_SREM);
5941 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5942 Register Dst = SDivorRem.getReg(0);
5943 Register LHS = SDivorRem.getReg(1);
5944 Register RHS = SDivorRem.getReg(2);
5945 LLT Ty = MRI.getType(Dst);
5946 LLT ScalarTy = Ty.getScalarType();
5947 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5949 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5950 auto &MIB = Builder;
5951
5952 bool UseSRA = false;
5953 SmallVector<Register, 16> ExactShifts, ExactFactors;
5954
5955 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5956 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5957
5958 auto BuildExactSDIVPattern = [&](const Constant *C) {
5959 // Don't recompute inverses for each splat element.
5960 if (IsSplat && !ExactFactors.empty()) {
5961 ExactShifts.push_back(ExactShifts[0]);
5962 ExactFactors.push_back(ExactFactors[0]);
5963 return true;
5964 }
5965
5966 auto *CI = cast<ConstantInt>(C);
5967 APInt Divisor = CI->getValue();
5968 unsigned Shift = Divisor.countr_zero();
5969 if (Shift) {
5970 Divisor.ashrInPlace(Shift);
5971 UseSRA = true;
5972 }
5973
5974 // Calculate the multiplicative inverse modulo BW.
5975 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5976 APInt Factor = Divisor.multiplicativeInverse();
5977 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5978 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5979 return true;
5980 };
5981
5982 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5983 // Collect all magic values from the build vector.
5984 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5985 (void)Matched;
5986 assert(Matched && "Expected unary predicate match to succeed");
5987
5988 Register Shift, Factor;
5989 if (Ty.isVector()) {
5990 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5991 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5992 } else {
5993 Shift = ExactShifts[0];
5994 Factor = ExactFactors[0];
5995 }
5996
5997 Register Res = LHS;
5998
5999 if (UseSRA)
6000 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
6001
6002 return MIB.buildMul(Ty, Res, Factor);
6003 }
6004
6005 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6006
6007 auto BuildSDIVPattern = [&](const Constant *C) {
6008 auto *CI = cast<ConstantInt>(C);
6009 const APInt &Divisor = CI->getValue();
6010
6013 int NumeratorFactor = 0;
6014 int ShiftMask = -1;
6015
6016 if (Divisor.isOne() || Divisor.isAllOnes()) {
6017 // If d is +1/-1, we just multiply the numerator by +1/-1.
6018 NumeratorFactor = Divisor.getSExtValue();
6019 Magics.Magic = 0;
6020 Magics.ShiftAmount = 0;
6021 ShiftMask = 0;
6022 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
6023 // If d > 0 and m < 0, add the numerator.
6024 NumeratorFactor = 1;
6025 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
6026 // If d < 0 and m > 0, subtract the numerator.
6027 NumeratorFactor = -1;
6028 }
6029
6030 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
6031 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
6032 Shifts.push_back(
6033 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
6034 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
6035
6036 return true;
6037 };
6038
6039 // Collect the shifts/magic values from each element.
6040 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
6041 (void)Matched;
6042 assert(Matched && "Expected unary predicate match to succeed");
6043
6044 Register MagicFactor, Factor, Shift, ShiftMask;
6045 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
6046 if (RHSDef) {
6047 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
6048 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
6049 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
6050 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
6051 } else {
6052 assert(MRI.getType(RHS).isScalar() &&
6053 "Non-build_vector operation should have been a scalar");
6054 MagicFactor = MagicFactors[0];
6055 Factor = Factors[0];
6056 Shift = Shifts[0];
6057 ShiftMask = ShiftMasks[0];
6058 }
6059
6060 Register Q = LHS;
6061 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
6062
6063 // (Optionally) Add/subtract the numerator using Factor.
6064 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
6065 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
6066
6067 // Shift right algebraic by shift value.
6068 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
6069
6070 // Extract the sign bit, mask it and add it to the quotient.
6071 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
6072 auto T = MIB.buildLShr(Ty, Q, SignShift);
6073 T = MIB.buildAnd(Ty, T, ShiftMask);
6074 auto ret = MIB.buildAdd(Ty, Q, T);
6075
6076 if (Opcode == TargetOpcode::G_SREM) {
6077 auto Prod = MIB.buildMul(Ty, ret, RHS);
6078 return MIB.buildSub(Ty, LHS, Prod);
6079 }
6080 return ret;
6081}
6082
6084 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6085 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6086 "Expected SDIV or UDIV");
6087 auto &Div = cast<GenericMachineInstr>(MI);
6088 Register RHS = Div.getReg(2);
6089 auto MatchPow2 = [&](const Constant *C) {
6090 auto *CI = dyn_cast<ConstantInt>(C);
6091 return CI && (CI->getValue().isPowerOf2() ||
6092 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6093 };
6094 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6095}
6096
6098 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6099 auto &SDiv = cast<GenericMachineInstr>(MI);
6100 Register Dst = SDiv.getReg(0);
6101 Register LHS = SDiv.getReg(1);
6102 Register RHS = SDiv.getReg(2);
6103 LLT Ty = MRI.getType(Dst);
6105 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6106 : LLT::integer(1);
6107
6108 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6109 // to the following version:
6110 //
6111 // %c1 = G_CTTZ %rhs
6112 // %inexact = G_SUB $bitwidth, %c1
6113 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6114 // %lshr = G_LSHR %sign, %inexact
6115 // %add = G_ADD %lhs, %lshr
6116 // %ashr = G_ASHR %add, %c1
6117 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6118 // %zero = G_CONSTANT $0
6119 // %neg = G_NEG %ashr
6120 // %isneg = G_ICMP SLT %rhs, %zero
6121 // %res = G_SELECT %isneg, %neg, %ashr
6122
6123 unsigned BitWidth = Ty.getScalarSizeInBits();
6124 auto Zero = Builder.buildConstant(Ty, 0);
6125
6126 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6127 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6128 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6129 // Splat the sign bit into the register
6130 auto Sign = Builder.buildAShr(
6131 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6132
6133 // Add (LHS < 0) ? abs2 - 1 : 0;
6134 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6135 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6136 auto AShr = Builder.buildAShr(Ty, Add, C1);
6137
6138 // Special case: (sdiv X, 1) -> X
6139 // Special Case: (sdiv X, -1) -> 0-X
6140 auto One = Builder.buildConstant(Ty, 1);
6141 auto MinusOne = Builder.buildConstant(Ty, -1);
6142 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6143 auto IsMinusOne =
6144 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6145 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6146 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6147
6148 // If divided by a positive value, we're done. Otherwise, the result must be
6149 // negated.
6150 auto Neg = Builder.buildNeg(Ty, AShr);
6151 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6152 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6153 MI.eraseFromParent();
6154}
6155
6157 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6158 auto &UDiv = cast<GenericMachineInstr>(MI);
6159 Register Dst = UDiv.getReg(0);
6160 Register LHS = UDiv.getReg(1);
6161 Register RHS = UDiv.getReg(2);
6162 LLT Ty = MRI.getType(Dst);
6164
6165 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6166 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6167 MI.eraseFromParent();
6168}
6169
6171 assert(MI.getOpcode() == TargetOpcode::G_SREM && "Expected SREM");
6172 auto &SRem = cast<GBinOp>(MI);
6173 Register Dst = SRem.getReg(0);
6174 Register LHS = SRem.getLHSReg();
6175 Register RHS = SRem.getRHSReg();
6176 LLT Ty = MRI.getType(Dst);
6178
6179 // Effectively we want to lower G_SREM %lhs, %rhs, where %rhs is +/- a power
6180 // of 2, to the following branch-free bias-and-mask version:
6181 //
6182 // %abs = G_ABS %rhs
6183 // %mask = G_SUB %abs, 1
6184 // %sign = G_ASHR %lhs, $(bitwidth - 1)
6185 // %bias = G_AND %sign, %mask
6186 // %biased = G_ADD %lhs, %bias
6187 // %masked = G_AND %biased, %mask
6188 // %res = G_SUB %masked, %bias
6189 //
6190 // The bias adds (|%rhs| - 1) for negative %lhs, correcting rounding towards
6191 // zero (instead of towards -inf that a plain mask would give). Constant
6192 // divisors collapse %mask to a single G_CONSTANT via the CSEMIRBuilder folds
6193 // for G_ABS and G_SUB.
6194
6195 unsigned BitWidth = Ty.getScalarSizeInBits();
6196 auto AbsRHS = Builder.buildAbs(Ty, RHS);
6197 auto Mask = Builder.buildSub(Ty, AbsRHS, Builder.buildConstant(Ty, 1));
6198 auto BWMinusOne = Builder.buildConstant(ShiftAmtTy, BitWidth - 1);
6199 auto Sign = Builder.buildAShr(Ty, LHS, BWMinusOne);
6200 auto Bias = Builder.buildAnd(Ty, Sign, Mask);
6201 auto Biased = Builder.buildAdd(Ty, LHS, Bias);
6202 auto Masked = Builder.buildAnd(Ty, Biased, Mask);
6203 Builder.buildSub(Dst, Masked, Bias);
6204 MI.eraseFromParent();
6205}
6206
6208 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6209 Register RHS = MI.getOperand(2).getReg();
6210 Register Dst = MI.getOperand(0).getReg();
6211 LLT Ty = MRI.getType(Dst);
6212 LLT RHSTy = MRI.getType(RHS);
6214 auto MatchPow2ExceptOne = [&](const Constant *C) {
6215 if (auto *CI = dyn_cast<ConstantInt>(C))
6216 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6217 return false;
6218 };
6219 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6220 return false;
6221 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6222 // get log base 2, and it is not always legal for on a target.
6223 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6224 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6225}
6226
6228 Register LHS = MI.getOperand(1).getReg();
6229 Register RHS = MI.getOperand(2).getReg();
6230 Register Dst = MI.getOperand(0).getReg();
6231 LLT Ty = MRI.getType(Dst);
6233 unsigned NumEltBits = Ty.getScalarSizeInBits();
6234
6235 auto LogBase2 = buildLogBase2(RHS, Builder);
6236 auto ShiftAmt =
6237 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6238 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6239 Builder.buildLShr(Dst, LHS, Trunc);
6240 MI.eraseFromParent();
6241}
6242
6244 Register &MatchInfo) const {
6245 Register Dst = MI.getOperand(0).getReg();
6246 Register Src = MI.getOperand(1).getReg();
6247 LLT DstTy = MRI.getType(Dst);
6248 LLT SrcTy = MRI.getType(Src);
6249 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6250 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6251 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6252
6254 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6255 return false;
6256
6257 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6258 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6259 return mi_match(Src, MRI,
6260 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6261 m_SpecificICstOrSplat(SignedMin)),
6262 m_SpecificICstOrSplat(SignedMax))) ||
6263 mi_match(Src, MRI,
6264 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6265 m_SpecificICstOrSplat(SignedMax)),
6266 m_SpecificICstOrSplat(SignedMin)));
6267}
6268
6270 Register &MatchInfo) const {
6271 Register Dst = MI.getOperand(0).getReg();
6272 Builder.buildTruncSSatS(Dst, MatchInfo);
6273 MI.eraseFromParent();
6274}
6275
6277 Register &MatchInfo) const {
6278 Register Dst = MI.getOperand(0).getReg();
6279 Register Src = MI.getOperand(1).getReg();
6280 LLT DstTy = MRI.getType(Dst);
6281 LLT SrcTy = MRI.getType(Src);
6282 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6283 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6284 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6285
6287 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6288 return false;
6289 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6290 return mi_match(Src, MRI,
6292 m_SpecificICstOrSplat(UnsignedMax))) ||
6293 mi_match(Src, MRI,
6294 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6295 m_SpecificICstOrSplat(UnsignedMax)),
6296 m_SpecificICstOrSplat(0))) ||
6297 mi_match(Src, MRI,
6299 m_SpecificICstOrSplat(UnsignedMax)));
6300}
6301
6303 Register &MatchInfo) const {
6304 Register Dst = MI.getOperand(0).getReg();
6305 Builder.buildTruncSSatU(Dst, MatchInfo);
6306 MI.eraseFromParent();
6307}
6308
6310 MachineInstr &MinMI) const {
6311 Register Min = MinMI.getOperand(2).getReg();
6312 Register Val = MinMI.getOperand(1).getReg();
6313 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6314 LLT SrcTy = MRI.getType(Val);
6315 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6316 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6317 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6318
6320 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6321 return false;
6322 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6323 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6324 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6325}
6326
6328 MachineInstr &SrcMI) const {
6329 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6330 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6331
6332 return LI &&
6333 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6334}
6335
6337 BuildFnTy &MatchInfo) const {
6338 unsigned Opc = MI.getOpcode();
6339 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6340 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6341 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6342
6343 Register Dst = MI.getOperand(0).getReg();
6344 Register X = MI.getOperand(1).getReg();
6345 Register Y = MI.getOperand(2).getReg();
6346 LLT Type = MRI.getType(Dst);
6347
6348 // fold (fadd x, fneg(y)) -> (fsub x, y)
6349 // fold (fadd fneg(y), x) -> (fsub x, y)
6350 // G_ADD is commutative so both cases are checked by m_GFAdd
6351 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6352 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6353 Opc = TargetOpcode::G_FSUB;
6354 }
6355 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6356 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6357 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6358 Opc = TargetOpcode::G_FADD;
6359 }
6360 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6361 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6362 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6363 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6364 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6365 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6366 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6367 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6368 // no opcode change
6369 } else
6370 return false;
6371
6372 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6373 Observer.changingInstr(MI);
6374 MI.setDesc(B.getTII().get(Opc));
6375 MI.getOperand(1).setReg(X);
6376 MI.getOperand(2).setReg(Y);
6377 Observer.changedInstr(MI);
6378 };
6379 return true;
6380}
6381
6383 Register &MatchInfo) const {
6384 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6385
6386 Register LHS = MI.getOperand(1).getReg();
6387 MatchInfo = MI.getOperand(2).getReg();
6388 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6389
6390 const auto LHSCst = Ty.isVector()
6391 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6393 if (!LHSCst)
6394 return false;
6395
6396 // -0.0 is always allowed
6397 if (LHSCst->Value.isNegZero())
6398 return true;
6399
6400 // +0.0 is only allowed if nsz is set.
6401 if (LHSCst->Value.isPosZero())
6402 return MI.getFlag(MachineInstr::FmNsz);
6403
6404 return false;
6405}
6406
6408 Register &MatchInfo) const {
6409 Register Dst = MI.getOperand(0).getReg();
6410 Builder.buildFNeg(
6411 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6412 eraseInst(MI);
6413}
6414
6415/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6416/// due to global flags or MachineInstr flags.
6417static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6418 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6419 return false;
6420 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6421}
6422
6423static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6424 const MachineRegisterInfo &MRI) {
6425 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6426 MRI.use_instr_nodbg_end()) >
6427 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6428 MRI.use_instr_nodbg_end());
6429}
6430
6432 bool &AllowFusionGlobally,
6433 bool &HasFMAD, bool &Aggressive,
6434 bool CanReassociate) const {
6435
6436 auto *MF = MI.getMF();
6437 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6438 const TargetOptions &Options = MF->getTarget().Options;
6439 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6440
6441 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6442 return false;
6443
6444 // Floating-point multiply-add with intermediate rounding.
6445 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6446 // Floating-point multiply-add without intermediate rounding.
6447 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6448 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6449 // No valid opcode, do not combine.
6450 if (!HasFMAD && !HasFMA)
6451 return false;
6452
6453 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6454 // If the addition is not contractable, do not combine.
6455 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6456 return false;
6457
6458 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6459 return true;
6460}
6461
6464 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6465 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6466
6467 bool AllowFusionGlobally, HasFMAD, Aggressive;
6468 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6469 return false;
6470
6471 Register Op1 = MI.getOperand(1).getReg();
6472 Register Op2 = MI.getOperand(2).getReg();
6473 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6474 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6475 unsigned PreferredFusedOpcode =
6476 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6477
6478 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6479 // prefer to fold the multiply with fewer uses.
6480 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6481 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6482 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6483 std::swap(LHS, RHS);
6484 }
6485
6486 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6487 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6488 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6489 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6490 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6491 {LHS.MI->getOperand(1).getReg(),
6492 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6493 };
6494 return true;
6495 }
6496
6497 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6498 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6499 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6500 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6501 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6502 {RHS.MI->getOperand(1).getReg(),
6503 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6504 };
6505 return true;
6506 }
6507
6508 return false;
6509}
6510
6513 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6514 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6515
6516 bool AllowFusionGlobally, HasFMAD, Aggressive;
6517 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6518 return false;
6519
6520 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6521 Register Op1 = MI.getOperand(1).getReg();
6522 Register Op2 = MI.getOperand(2).getReg();
6523 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6524 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6525 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6526
6527 unsigned PreferredFusedOpcode =
6528 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6529
6530 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6531 // prefer to fold the multiply with fewer uses.
6532 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6533 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6534 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6535 std::swap(LHS, RHS);
6536 }
6537
6538 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6539 MachineInstr *FpExtSrc;
6540 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6541 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6542 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6543 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6544 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6545 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6546 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6547 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6548 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6549 };
6550 return true;
6551 }
6552
6553 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6554 // Note: Commutes FADD operands.
6555 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6556 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6557 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6558 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6559 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6560 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6561 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6562 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6563 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6564 };
6565 return true;
6566 }
6567
6568 return false;
6569}
6570
6573 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6574 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6575
6576 bool AllowFusionGlobally, HasFMAD, Aggressive;
6577 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6578 return false;
6579
6580 Register Op1 = MI.getOperand(1).getReg();
6581 Register Op2 = MI.getOperand(2).getReg();
6582 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6583 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6584 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6585
6586 unsigned PreferredFusedOpcode =
6587 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6588
6589 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6590 // prefer to fold the multiply with fewer uses.
6591 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6592 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6593 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6594 std::swap(LHS, RHS);
6595 }
6596
6597 MachineInstr *FMA = nullptr;
6598 Register Z;
6599 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6600 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6601 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6602 TargetOpcode::G_FMUL) &&
6603 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6604 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6605 FMA = LHS.MI;
6606 Z = RHS.Reg;
6607 }
6608 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6609 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6610 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6611 TargetOpcode::G_FMUL) &&
6612 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6613 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6614 Z = LHS.Reg;
6615 FMA = RHS.MI;
6616 }
6617
6618 if (FMA) {
6619 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6620 Register X = FMA->getOperand(1).getReg();
6621 Register Y = FMA->getOperand(2).getReg();
6622 Register U = FMulMI->getOperand(1).getReg();
6623 Register V = FMulMI->getOperand(2).getReg();
6624
6625 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6626 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6627 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6628 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6629 {X, Y, InnerFMA});
6630 };
6631 return true;
6632 }
6633
6634 return false;
6635}
6636
6639 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6640 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6641
6642 bool AllowFusionGlobally, HasFMAD, Aggressive;
6643 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6644 return false;
6645
6646 if (!Aggressive)
6647 return false;
6648
6649 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6650 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6651 Register Op1 = MI.getOperand(1).getReg();
6652 Register Op2 = MI.getOperand(2).getReg();
6653 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6654 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6655
6656 unsigned PreferredFusedOpcode =
6657 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6658
6659 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6660 // prefer to fold the multiply with fewer uses.
6661 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6662 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6663 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6664 std::swap(LHS, RHS);
6665 }
6666
6667 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6668 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6670 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6671 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6672 Register InnerFMA =
6673 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6674 .getReg(0);
6675 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6676 {X, Y, InnerFMA});
6677 };
6678
6679 MachineInstr *FMulMI, *FMAMI;
6680 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6681 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6682 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6683 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6684 m_GFPExt(m_MInstr(FMulMI))) &&
6685 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6686 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6687 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6688 MatchInfo = [=](MachineIRBuilder &B) {
6689 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6690 FMulMI->getOperand(2).getReg(), RHS.Reg,
6691 LHS.MI->getOperand(1).getReg(),
6692 LHS.MI->getOperand(2).getReg(), B);
6693 };
6694 return true;
6695 }
6696
6697 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6698 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6699 // FIXME: This turns two single-precision and one double-precision
6700 // operation into two double-precision operations, which might not be
6701 // interesting for all targets, especially GPUs.
6702 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6703 FMAMI->getOpcode() == PreferredFusedOpcode) {
6704 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6705 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6706 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6707 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6708 MatchInfo = [=](MachineIRBuilder &B) {
6709 Register X = FMAMI->getOperand(1).getReg();
6710 Register Y = FMAMI->getOperand(2).getReg();
6711 X = B.buildFPExt(DstType, X).getReg(0);
6712 Y = B.buildFPExt(DstType, Y).getReg(0);
6713 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6714 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6715 };
6716
6717 return true;
6718 }
6719 }
6720
6721 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6722 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6723 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6724 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6725 m_GFPExt(m_MInstr(FMulMI))) &&
6726 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6727 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6728 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6729 MatchInfo = [=](MachineIRBuilder &B) {
6730 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6731 FMulMI->getOperand(2).getReg(), LHS.Reg,
6732 RHS.MI->getOperand(1).getReg(),
6733 RHS.MI->getOperand(2).getReg(), B);
6734 };
6735 return true;
6736 }
6737
6738 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6739 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6740 // FIXME: This turns two single-precision and one double-precision
6741 // operation into two double-precision operations, which might not be
6742 // interesting for all targets, especially GPUs.
6743 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6744 FMAMI->getOpcode() == PreferredFusedOpcode) {
6745 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6746 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6747 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6748 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6749 MatchInfo = [=](MachineIRBuilder &B) {
6750 Register X = FMAMI->getOperand(1).getReg();
6751 Register Y = FMAMI->getOperand(2).getReg();
6752 X = B.buildFPExt(DstType, X).getReg(0);
6753 Y = B.buildFPExt(DstType, Y).getReg(0);
6754 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6755 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6756 };
6757 return true;
6758 }
6759 }
6760
6761 return false;
6762}
6763
6766 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6767 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6768
6769 bool AllowFusionGlobally, HasFMAD, Aggressive;
6770 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6771 return false;
6772
6773 Register Op1 = MI.getOperand(1).getReg();
6774 Register Op2 = MI.getOperand(2).getReg();
6775 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6776 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6777 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6778
6779 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6780 // prefer to fold the multiply with fewer uses.
6781 int FirstMulHasFewerUses = true;
6782 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6783 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6784 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6785 FirstMulHasFewerUses = false;
6786
6787 unsigned PreferredFusedOpcode =
6788 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6789
6790 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6791 if (FirstMulHasFewerUses &&
6792 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6793 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6794 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6795 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6796 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6797 {LHS.MI->getOperand(1).getReg(),
6798 LHS.MI->getOperand(2).getReg(), NegZ});
6799 };
6800 return true;
6801 }
6802 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6803 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6804 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6805 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6806 Register NegY =
6807 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6808 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6809 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6810 };
6811 return true;
6812 }
6813
6814 return false;
6815}
6816
6819 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6820 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6821
6822 bool AllowFusionGlobally, HasFMAD, Aggressive;
6823 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6824 return false;
6825
6826 Register LHSReg = MI.getOperand(1).getReg();
6827 Register RHSReg = MI.getOperand(2).getReg();
6828 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6829
6830 unsigned PreferredFusedOpcode =
6831 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6832
6833 MachineInstr *FMulMI;
6834 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6835 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6836 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6837 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6838 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6839 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6840 Register NegX =
6841 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6842 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6843 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6844 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6845 };
6846 return true;
6847 }
6848
6849 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6850 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6851 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6852 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6853 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6854 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6855 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6856 {FMulMI->getOperand(1).getReg(),
6857 FMulMI->getOperand(2).getReg(), LHSReg});
6858 };
6859 return true;
6860 }
6861
6862 return false;
6863}
6864
6867 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6868 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6869
6870 bool AllowFusionGlobally, HasFMAD, Aggressive;
6871 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6872 return false;
6873
6874 Register LHSReg = MI.getOperand(1).getReg();
6875 Register RHSReg = MI.getOperand(2).getReg();
6876 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6877
6878 unsigned PreferredFusedOpcode =
6879 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6880
6881 MachineInstr *FMulMI;
6882 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6883 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6884 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6885 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6886 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6887 Register FpExtX =
6888 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6889 Register FpExtY =
6890 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6891 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6892 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6893 {FpExtX, FpExtY, NegZ});
6894 };
6895 return true;
6896 }
6897
6898 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6899 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6900 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6901 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6902 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6903 Register FpExtY =
6904 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6905 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6906 Register FpExtZ =
6907 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6908 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6909 {NegY, FpExtZ, LHSReg});
6910 };
6911 return true;
6912 }
6913
6914 return false;
6915}
6916
6919 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6920 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6921
6922 bool AllowFusionGlobally, HasFMAD, Aggressive;
6923 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6924 return false;
6925
6926 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6927 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6928 Register LHSReg = MI.getOperand(1).getReg();
6929 Register RHSReg = MI.getOperand(2).getReg();
6930
6931 unsigned PreferredFusedOpcode =
6932 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6933
6934 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6936 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6937 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6938 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6939 };
6940
6941 MachineInstr *FMulMI;
6942 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6943 // (fneg (fma (fpext x), (fpext y), z))
6944 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6945 // (fneg (fma (fpext x), (fpext y), z))
6946 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6947 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6948 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6949 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6950 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6951 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6952 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6953 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6954 FMulMI->getOperand(2).getReg(), RHSReg, B);
6955 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6956 };
6957 return true;
6958 }
6959
6960 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6961 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6962 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6963 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6964 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6965 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6966 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6967 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6968 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6969 FMulMI->getOperand(2).getReg(), LHSReg, B);
6970 };
6971 return true;
6972 }
6973
6974 return false;
6975}
6976
6978 unsigned &IdxToPropagate) const {
6979 bool PropagateNaN;
6980 switch (MI.getOpcode()) {
6981 default:
6982 return false;
6983 case TargetOpcode::G_FMINNUM:
6984 case TargetOpcode::G_FMAXNUM:
6985 PropagateNaN = false;
6986 break;
6987 case TargetOpcode::G_FMINIMUM:
6988 case TargetOpcode::G_FMAXIMUM:
6989 PropagateNaN = true;
6990 break;
6991 }
6992
6993 auto MatchNaN = [&](unsigned Idx) {
6994 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6995 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6996 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6997 return false;
6998 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6999 return true;
7000 };
7001
7002 return MatchNaN(1) || MatchNaN(2);
7003}
7004
7005// Combine multiple FDIVs with the same divisor into multiple FMULs by the
7006// reciprocal.
7007// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
7009 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
7010 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
7011
7012 Register X = MI.getOperand(1).getReg();
7013 Register Y = MI.getOperand(2).getReg();
7014
7015 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
7016 return false;
7017
7018 auto IsOne = [this](Register X) {
7019 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
7020 return N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0));
7021 };
7022
7023 // Skip if current node is a reciprocal/fneg-reciprocal.
7024 if (IsOne(X))
7025 return false;
7026
7027 // Exit early if the target does not want this transform or if there can't
7028 // possibly be enough uses of the divisor to make the transform worthwhile.
7029 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
7030 if (!MinUses)
7031 return false;
7032
7033 // Find all FDIV users of the same divisor. For the moment we limit all
7034 // instructions to a single BB and use the first Instr in MatchInfo as the
7035 // dominating position.
7036 MatchInfo.push_back(&MI);
7037 for (auto &U : MRI.use_nodbg_instructions(Y)) {
7038 if (&U == &MI || U.getParent() != MI.getParent())
7039 continue;
7040 if (U.getOpcode() == TargetOpcode::G_FDIV &&
7041 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y &&
7042 !IsOne(U.getOperand(1).getReg())) {
7043 // This division is eligible for optimization only if global unsafe math
7044 // is enabled or if this division allows reciprocal formation.
7045 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
7046 MatchInfo.push_back(&U);
7047 if (dominates(U, *MatchInfo[0]))
7048 std::swap(MatchInfo[0], MatchInfo.back());
7049 }
7050 }
7051 }
7052
7053 // Now that we have the actual number of divisor uses, make sure it meets
7054 // the minimum threshold specified by the target.
7055 return MatchInfo.size() >= MinUses;
7056}
7057
7059 SmallVector<MachineInstr *> &MatchInfo) const {
7060 // Generate the new div at the position of the first instruction, that we have
7061 // ensured will dominate all other instructions.
7062 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
7063 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
7064 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
7065 MatchInfo[0]->getOperand(2).getReg(),
7066 MatchInfo[0]->getFlags());
7067
7068 // Replace all found div's with fmul instructions.
7069 for (MachineInstr *MI : MatchInfo) {
7070 Builder.setInsertPt(*MI->getParent(), MI);
7071 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
7072 Div->getOperand(0).getReg(), MI->getFlags());
7073 MI->eraseFromParent();
7074 }
7075}
7076
7078 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
7079 Register LHS = MI.getOperand(1).getReg();
7080 Register RHS = MI.getOperand(2).getReg();
7081
7082 // Helper lambda to check for opportunities for
7083 // A + (B - A) -> B
7084 // (B - A) + A -> B
7085 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
7086 Register Reg;
7087 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
7088 Reg == MaybeSameReg;
7089 };
7090 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
7091}
7092
7094 Register &MatchInfo) const {
7095 // This combine folds the following patterns:
7096 //
7097 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
7098 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
7099 // into
7100 // x
7101 // if
7102 // k == sizeof(VecEltTy)/2
7103 // type(x) == type(dst)
7104 //
7105 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
7106 // into
7107 // x
7108 // if
7109 // type(x) == type(dst)
7110
7111 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
7112 LLT DstEltTy = DstVecTy.getElementType();
7113
7114 Register Lo, Hi;
7115
7116 if (mi_match(
7117 MI, MRI,
7119 MatchInfo = Lo;
7120 return MRI.getType(MatchInfo) == DstVecTy;
7121 }
7122
7123 std::optional<ValueAndVReg> ShiftAmount;
7124 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7125 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7126 if (mi_match(
7127 MI, MRI,
7128 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7129 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7130 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7131 MatchInfo = Lo;
7132 return MRI.getType(MatchInfo) == DstVecTy;
7133 }
7134 }
7135
7136 return false;
7137}
7138
7140 Register &MatchInfo) const {
7141 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7142 // if type(x) == type(G_TRUNC)
7143 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7144 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7145 return false;
7146
7147 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7148}
7149
7151 Register &MatchInfo) const {
7152 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7153 // y if K == size of vector element type
7154 std::optional<ValueAndVReg> ShiftAmt;
7155 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7157 m_GCst(ShiftAmt))))
7158 return false;
7159
7160 LLT MatchTy = MRI.getType(MatchInfo);
7161 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7162 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7163}
7164
7165unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7166 CmpInst::Predicate Pred, LLT DstTy,
7167 SelectPatternNaNBehaviour VsNaNRetVal) const {
7168 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7169 "Expected a NaN behaviour?");
7170 // Choose an opcode based off of legality or the behaviour when one of the
7171 // LHS/RHS may be NaN.
7172 switch (Pred) {
7173 default:
7174 return 0;
7175 case CmpInst::FCMP_UGT:
7176 case CmpInst::FCMP_UGE:
7177 case CmpInst::FCMP_OGT:
7178 case CmpInst::FCMP_OGE:
7179 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7180 return TargetOpcode::G_FMAXNUM;
7181 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7182 return TargetOpcode::G_FMAXIMUM;
7183 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7184 return TargetOpcode::G_FMAXNUM;
7185 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7186 return TargetOpcode::G_FMAXIMUM;
7187 return 0;
7188 case CmpInst::FCMP_ULT:
7189 case CmpInst::FCMP_ULE:
7190 case CmpInst::FCMP_OLT:
7191 case CmpInst::FCMP_OLE:
7192 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7193 return TargetOpcode::G_FMINNUM;
7194 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7195 return TargetOpcode::G_FMINIMUM;
7196 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7197 return TargetOpcode::G_FMINNUM;
7198 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7199 return 0;
7200 return TargetOpcode::G_FMINIMUM;
7201 }
7202}
7203
7204CombinerHelper::SelectPatternNaNBehaviour
7205CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7206 bool IsOrderedComparison) const {
7207 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7208 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7209 // Completely unsafe.
7210 if (!LHSSafe && !RHSSafe)
7211 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7212 if (LHSSafe && RHSSafe)
7213 return SelectPatternNaNBehaviour::RETURNS_ANY;
7214 // An ordered comparison will return false when given a NaN, so it
7215 // returns the RHS.
7216 if (IsOrderedComparison)
7217 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7218 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7219 // An unordered comparison will return true when given a NaN, so it
7220 // returns the LHS.
7221 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7222 : SelectPatternNaNBehaviour::RETURNS_NAN;
7223}
7224
7225bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7226 Register TrueVal, Register FalseVal,
7227 BuildFnTy &MatchInfo) const {
7228 // Match: select (fcmp cond x, y) x, y
7229 // select (fcmp cond x, y) y, x
7230 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7231 LLT DstTy = MRI.getType(Dst);
7232 // Bail out early on pointers, since we'll never want to fold to a min/max.
7233 if (DstTy.isPointer())
7234 return false;
7235 // Match a floating point compare with a less-than/greater-than predicate.
7236 // TODO: Allow multiple users of the compare if they are all selects.
7237 CmpInst::Predicate Pred;
7238 Register CmpLHS, CmpRHS;
7239 if (!mi_match(Cond, MRI,
7241 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7242 CmpInst::isEquality(Pred))
7243 return false;
7244 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7245 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7246 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7247 return false;
7248 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7249 std::swap(CmpLHS, CmpRHS);
7250 Pred = CmpInst::getSwappedPredicate(Pred);
7251 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7252 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7253 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7254 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7255 }
7256 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7257 return false;
7258 // Decide what type of max/min this should be based off of the predicate.
7259 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7260 if (!Opc || !isLegal({Opc, {DstTy}}))
7261 return false;
7262 // Comparisons between signed zero and zero may have different results...
7263 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7264 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7265 // We don't know if a comparison between two 0s will give us a consistent
7266 // result. Be conservative and only proceed if at least one side is
7267 // non-zero.
7268 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7269 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7270 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7271 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7272 return false;
7273 }
7274 }
7275 MatchInfo = [=](MachineIRBuilder &B) {
7276 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7277 };
7278 return true;
7279}
7280
7282 BuildFnTy &MatchInfo) const {
7283 // TODO: Handle integer cases.
7284 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7285 // Condition may be fed by a truncated compare.
7286 Register Cond = MI.getOperand(1).getReg();
7287 Register MaybeTrunc;
7288 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7289 Cond = MaybeTrunc;
7290 Register Dst = MI.getOperand(0).getReg();
7291 Register TrueVal = MI.getOperand(2).getReg();
7292 Register FalseVal = MI.getOperand(3).getReg();
7293 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7294}
7295
7297 BuildFnTy &MatchInfo) const {
7298 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7299 // (X + Y) == X --> Y == 0
7300 // (X + Y) != X --> Y != 0
7301 // (X - Y) == X --> Y == 0
7302 // (X - Y) != X --> Y != 0
7303 // (X ^ Y) == X --> Y == 0
7304 // (X ^ Y) != X --> Y != 0
7305 Register Dst = MI.getOperand(0).getReg();
7306 CmpInst::Predicate Pred;
7307 Register X, Y, OpLHS, OpRHS;
7308 bool MatchedSub = mi_match(
7309 Dst, MRI,
7310 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7311 if (MatchedSub && X != OpLHS)
7312 return false;
7313 if (!MatchedSub) {
7314 if (!mi_match(Dst, MRI,
7315 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7316 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7317 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7318 return false;
7319 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7320 }
7321 MatchInfo = [=](MachineIRBuilder &B) {
7322 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7323 B.buildICmp(Pred, Dst, Y, Zero);
7324 };
7325 return CmpInst::isEquality(Pred) && Y.isValid();
7326}
7327
7328/// Return the minimum useless shift amount that results in complete loss of the
7329/// source value. Return std::nullopt when it cannot determine a value.
7330static std::optional<unsigned>
7331getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7332 std::optional<int64_t> &Result) {
7333 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7334 Opcode == TargetOpcode::G_ASHR) &&
7335 "Expect G_SHL, G_LSHR or G_ASHR.");
7336 auto SignificantBits = 0;
7337 switch (Opcode) {
7338 case TargetOpcode::G_SHL:
7339 SignificantBits = ValueKB.countMinTrailingZeros();
7340 Result = 0;
7341 break;
7342 case TargetOpcode::G_LSHR:
7343 Result = 0;
7344 SignificantBits = ValueKB.countMinLeadingZeros();
7345 break;
7346 case TargetOpcode::G_ASHR:
7347 if (ValueKB.isNonNegative()) {
7348 SignificantBits = ValueKB.countMinLeadingZeros();
7349 Result = 0;
7350 } else if (ValueKB.isNegative()) {
7351 SignificantBits = ValueKB.countMinLeadingOnes();
7352 Result = -1;
7353 } else {
7354 // Cannot determine shift result.
7355 Result = std::nullopt;
7356 }
7357 break;
7358 default:
7359 break;
7360 }
7361 return ValueKB.getBitWidth() - SignificantBits;
7362}
7363
7365 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7366 Register ShiftVal = MI.getOperand(1).getReg();
7367 Register ShiftReg = MI.getOperand(2).getReg();
7368 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7369 auto IsShiftTooBig = [&](const Constant *C) {
7370 auto *CI = dyn_cast<ConstantInt>(C);
7371 if (!CI)
7372 return false;
7373 if (CI->uge(ResTy.getScalarSizeInBits())) {
7374 MatchInfo = std::nullopt;
7375 return true;
7376 }
7377 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7378 MI.getOpcode(), MatchInfo);
7379 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7380 };
7381 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7382}
7383
7385 unsigned LHSOpndIdx = 1;
7386 unsigned RHSOpndIdx = 2;
7387 switch (MI.getOpcode()) {
7388 case TargetOpcode::G_UADDO:
7389 case TargetOpcode::G_SADDO:
7390 case TargetOpcode::G_UMULO:
7391 case TargetOpcode::G_SMULO:
7392 LHSOpndIdx = 2;
7393 RHSOpndIdx = 3;
7394 break;
7395 default:
7396 break;
7397 }
7398 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7399 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7400 if (!getIConstantVRegVal(LHS, MRI)) {
7401 // Skip commuting if LHS is not a constant. But, LHS may be a
7402 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7403 // have a constant on the RHS.
7404 if (MRI.getVRegDef(LHS)->getOpcode() !=
7405 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7406 return false;
7407 }
7408 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7409 return MRI.getVRegDef(RHS)->getOpcode() !=
7410 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7411 !getIConstantVRegVal(RHS, MRI);
7412}
7413
7415 Register LHS = MI.getOperand(1).getReg();
7416 Register RHS = MI.getOperand(2).getReg();
7417 std::optional<FPValueAndVReg> ValAndVReg;
7418 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7419 return false;
7420 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7421}
7422
7424 Observer.changingInstr(MI);
7425 unsigned LHSOpndIdx = 1;
7426 unsigned RHSOpndIdx = 2;
7427 switch (MI.getOpcode()) {
7428 case TargetOpcode::G_UADDO:
7429 case TargetOpcode::G_SADDO:
7430 case TargetOpcode::G_UMULO:
7431 case TargetOpcode::G_SMULO:
7432 LHSOpndIdx = 2;
7433 RHSOpndIdx = 3;
7434 break;
7435 default:
7436 break;
7437 }
7438 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7439 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7440 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7441 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7442 Observer.changedInstr(MI);
7443}
7444
7445bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7446 LLT SrcTy = MRI.getType(Src);
7447 if (SrcTy.isFixedVector())
7448 return isConstantSplatVector(Src, 1, AllowUndefs);
7449 if (SrcTy.isScalar()) {
7450 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7451 return true;
7452 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7453 return IConstant && IConstant->Value == 1;
7454 }
7455 return false; // scalable vector
7456}
7457
7458bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7459 LLT SrcTy = MRI.getType(Src);
7460 if (SrcTy.isFixedVector())
7461 return isConstantSplatVector(Src, 0, AllowUndefs);
7462 if (SrcTy.isScalar()) {
7463 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7464 return true;
7465 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7466 return IConstant && IConstant->Value == 0;
7467 }
7468 return false; // scalable vector
7469}
7470
7471// Ignores COPYs during conformance checks.
7472// FIXME scalable vectors.
7473bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7474 bool AllowUndefs) const {
7475 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7476 if (!BuildVector)
7477 return false;
7478 unsigned NumSources = BuildVector->getNumSources();
7479
7480 for (unsigned I = 0; I < NumSources; ++I) {
7481 GImplicitDef *ImplicitDef =
7483 if (ImplicitDef && AllowUndefs)
7484 continue;
7485 if (ImplicitDef && !AllowUndefs)
7486 return false;
7487 std::optional<ValueAndVReg> IConstant =
7489 if (IConstant && IConstant->Value == SplatValue)
7490 continue;
7491 return false;
7492 }
7493 return true;
7494}
7495
7496// Ignores COPYs during lookups.
7497// FIXME scalable vectors
7498std::optional<APInt>
7499CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7500 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7501 if (IConstant)
7502 return IConstant->Value;
7503
7504 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7505 if (!BuildVector)
7506 return std::nullopt;
7507 unsigned NumSources = BuildVector->getNumSources();
7508
7509 std::optional<APInt> Value = std::nullopt;
7510 for (unsigned I = 0; I < NumSources; ++I) {
7511 std::optional<ValueAndVReg> IConstant =
7513 if (!IConstant)
7514 return std::nullopt;
7515 if (!Value)
7516 Value = IConstant->Value;
7517 else if (*Value != IConstant->Value)
7518 return std::nullopt;
7519 }
7520 return Value;
7521}
7522
7523// FIXME G_SPLAT_VECTOR
7524bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7525 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7526 if (IConstant)
7527 return true;
7528
7529 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7530 if (!BuildVector)
7531 return false;
7532
7533 unsigned NumSources = BuildVector->getNumSources();
7534 for (unsigned I = 0; I < NumSources; ++I) {
7535 std::optional<ValueAndVReg> IConstant =
7537 if (!IConstant)
7538 return false;
7539 }
7540 return true;
7541}
7542
7543// TODO: use knownbits to determine zeros
7544bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7545 BuildFnTy &MatchInfo) const {
7546 uint32_t Flags = Select->getFlags();
7547 Register Dest = Select->getReg(0);
7548 Register Cond = Select->getCondReg();
7549 Register True = Select->getTrueReg();
7550 Register False = Select->getFalseReg();
7551 LLT CondTy = MRI.getType(Select->getCondReg());
7552 LLT TrueTy = MRI.getType(Select->getTrueReg());
7553
7554 // We only do this combine for scalar boolean conditions.
7555 if (CondTy != LLT::scalar(1))
7556 return false;
7557
7558 if (TrueTy.isPointer())
7559 return false;
7560
7561 // Both are scalars.
7562 std::optional<ValueAndVReg> TrueOpt =
7564 std::optional<ValueAndVReg> FalseOpt =
7566
7567 if (!TrueOpt || !FalseOpt)
7568 return false;
7569
7570 APInt TrueValue = TrueOpt->Value;
7571 APInt FalseValue = FalseOpt->Value;
7572
7573 // select Cond, 1, 0 --> zext (Cond)
7574 if (TrueValue.isOne() && FalseValue.isZero()) {
7575 MatchInfo = [=](MachineIRBuilder &B) {
7576 B.setInstrAndDebugLoc(*Select);
7577 B.buildZExtOrTrunc(Dest, Cond);
7578 };
7579 return true;
7580 }
7581
7582 // select Cond, -1, 0 --> sext (Cond)
7583 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7584 MatchInfo = [=](MachineIRBuilder &B) {
7585 B.setInstrAndDebugLoc(*Select);
7586 B.buildSExtOrTrunc(Dest, Cond);
7587 };
7588 return true;
7589 }
7590
7591 // select Cond, 0, 1 --> zext (!Cond)
7592 if (TrueValue.isZero() && FalseValue.isOne()) {
7593 MatchInfo = [=](MachineIRBuilder &B) {
7594 B.setInstrAndDebugLoc(*Select);
7595 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7596 B.buildNot(Inner, Cond);
7597 B.buildZExtOrTrunc(Dest, Inner);
7598 };
7599 return true;
7600 }
7601
7602 // select Cond, 0, -1 --> sext (!Cond)
7603 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7604 MatchInfo = [=](MachineIRBuilder &B) {
7605 B.setInstrAndDebugLoc(*Select);
7606 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7607 B.buildNot(Inner, Cond);
7608 B.buildSExtOrTrunc(Dest, Inner);
7609 };
7610 return true;
7611 }
7612
7613 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7614 if (TrueValue - 1 == FalseValue) {
7615 MatchInfo = [=](MachineIRBuilder &B) {
7616 B.setInstrAndDebugLoc(*Select);
7617 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7618 B.buildZExtOrTrunc(Inner, Cond);
7619 B.buildAdd(Dest, Inner, False);
7620 };
7621 return true;
7622 }
7623
7624 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7625 if (TrueValue + 1 == FalseValue) {
7626 MatchInfo = [=](MachineIRBuilder &B) {
7627 B.setInstrAndDebugLoc(*Select);
7628 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7629 B.buildSExtOrTrunc(Inner, Cond);
7630 B.buildAdd(Dest, Inner, False);
7631 };
7632 return true;
7633 }
7634
7635 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7636 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7637 MatchInfo = [=](MachineIRBuilder &B) {
7638 B.setInstrAndDebugLoc(*Select);
7639 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7640 B.buildZExtOrTrunc(Inner, Cond);
7641 // The shift amount must be scalar.
7642 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7643 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7644 B.buildShl(Dest, Inner, ShAmtC, Flags);
7645 };
7646 return true;
7647 }
7648
7649 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7650 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7651 MatchInfo = [=](MachineIRBuilder &B) {
7652 B.setInstrAndDebugLoc(*Select);
7653 Register Not = MRI.createGenericVirtualRegister(CondTy);
7654 B.buildNot(Not, Cond);
7655 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7656 B.buildZExtOrTrunc(Inner, Not);
7657 // The shift amount must be scalar.
7658 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7659 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7660 B.buildShl(Dest, Inner, ShAmtC, Flags);
7661 };
7662 return true;
7663 }
7664
7665 // select Cond, -1, C --> or (sext Cond), C
7666 if (TrueValue.isAllOnes()) {
7667 MatchInfo = [=](MachineIRBuilder &B) {
7668 B.setInstrAndDebugLoc(*Select);
7669 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7670 B.buildSExtOrTrunc(Inner, Cond);
7671 B.buildOr(Dest, Inner, False, Flags);
7672 };
7673 return true;
7674 }
7675
7676 // select Cond, C, -1 --> or (sext (not Cond)), C
7677 if (FalseValue.isAllOnes()) {
7678 MatchInfo = [=](MachineIRBuilder &B) {
7679 B.setInstrAndDebugLoc(*Select);
7680 Register Not = MRI.createGenericVirtualRegister(CondTy);
7681 B.buildNot(Not, Cond);
7682 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7683 B.buildSExtOrTrunc(Inner, Not);
7684 B.buildOr(Dest, Inner, True, Flags);
7685 };
7686 return true;
7687 }
7688
7689 return false;
7690}
7691
7692// TODO: use knownbits to determine zeros
7693bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7694 BuildFnTy &MatchInfo) const {
7695 uint32_t Flags = Select->getFlags();
7696 Register DstReg = Select->getReg(0);
7697 Register Cond = Select->getCondReg();
7698 Register True = Select->getTrueReg();
7699 Register False = Select->getFalseReg();
7700 LLT CondTy = MRI.getType(Select->getCondReg());
7701 LLT TrueTy = MRI.getType(Select->getTrueReg());
7702
7703 // Boolean or fixed vector of booleans.
7704 if (CondTy.isScalableVector() ||
7705 (CondTy.isFixedVector() &&
7706 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7707 CondTy.getScalarSizeInBits() != 1)
7708 return false;
7709
7710 if (CondTy != TrueTy)
7711 return false;
7712
7713 // select Cond, Cond, F --> or Cond, F
7714 // select Cond, 1, F --> or Cond, F
7715 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7716 MatchInfo = [=](MachineIRBuilder &B) {
7717 B.setInstrAndDebugLoc(*Select);
7718 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7719 B.buildZExtOrTrunc(Ext, Cond);
7720 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7721 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7722 };
7723 return true;
7724 }
7725
7726 // select Cond, T, Cond --> and Cond, T
7727 // select Cond, T, 0 --> and Cond, T
7728 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7729 MatchInfo = [=](MachineIRBuilder &B) {
7730 B.setInstrAndDebugLoc(*Select);
7731 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7732 B.buildZExtOrTrunc(Ext, Cond);
7733 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7734 B.buildAnd(DstReg, Ext, FreezeTrue);
7735 };
7736 return true;
7737 }
7738
7739 // select Cond, T, 1 --> or (not Cond), T
7740 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7741 MatchInfo = [=](MachineIRBuilder &B) {
7742 B.setInstrAndDebugLoc(*Select);
7743 // First the not.
7744 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7745 B.buildNot(Inner, Cond);
7746 // Then an ext to match the destination register.
7747 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7748 B.buildZExtOrTrunc(Ext, Inner);
7749 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7750 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7751 };
7752 return true;
7753 }
7754
7755 // select Cond, 0, F --> and (not Cond), F
7756 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7757 MatchInfo = [=](MachineIRBuilder &B) {
7758 B.setInstrAndDebugLoc(*Select);
7759 // First the not.
7760 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7761 B.buildNot(Inner, Cond);
7762 // Then an ext to match the destination register.
7763 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7764 B.buildZExtOrTrunc(Ext, Inner);
7765 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7766 B.buildAnd(DstReg, Ext, FreezeFalse);
7767 };
7768 return true;
7769 }
7770
7771 return false;
7772}
7773
7775 BuildFnTy &MatchInfo) const {
7776 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7777 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7778
7779 Register DstReg = Select->getReg(0);
7780 Register True = Select->getTrueReg();
7781 Register False = Select->getFalseReg();
7782 LLT DstTy = MRI.getType(DstReg);
7783
7784 if (DstTy.isPointerOrPointerVector())
7785 return false;
7786
7787 // We want to fold the icmp and replace the select.
7788 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7789 return false;
7790
7791 CmpInst::Predicate Pred = Cmp->getCond();
7792 // We need a larger or smaller predicate for
7793 // canonicalization.
7794 if (CmpInst::isEquality(Pred))
7795 return false;
7796
7797 Register CmpLHS = Cmp->getLHSReg();
7798 Register CmpRHS = Cmp->getRHSReg();
7799
7800 // We can swap CmpLHS and CmpRHS for higher hitrate.
7801 if (True == CmpRHS && False == CmpLHS) {
7802 std::swap(CmpLHS, CmpRHS);
7803 Pred = CmpInst::getSwappedPredicate(Pred);
7804 }
7805
7806 // (icmp X, Y) ? X : Y -> integer minmax.
7807 // see matchSelectPattern in ValueTracking.
7808 // Legality between G_SELECT and integer minmax can differ.
7809 if (True != CmpLHS || False != CmpRHS)
7810 return false;
7811
7812 switch (Pred) {
7813 case ICmpInst::ICMP_UGT:
7814 case ICmpInst::ICMP_UGE: {
7815 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7816 return false;
7817 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7818 return true;
7819 }
7820 case ICmpInst::ICMP_SGT:
7821 case ICmpInst::ICMP_SGE: {
7822 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7823 return false;
7824 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7825 return true;
7826 }
7827 case ICmpInst::ICMP_ULT:
7828 case ICmpInst::ICMP_ULE: {
7829 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7830 return false;
7831 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7832 return true;
7833 }
7834 case ICmpInst::ICMP_SLT:
7835 case ICmpInst::ICMP_SLE: {
7836 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7837 return false;
7838 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7839 return true;
7840 }
7841 default:
7842 return false;
7843 }
7844}
7845
7846// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7848 BuildFnTy &MatchInfo) const {
7849 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7850 Register DestReg = MI.getOperand(0).getReg();
7851 LLT DestTy = MRI.getType(DestReg);
7852
7853 Register X;
7854 Register Sub0;
7855 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7856 if (mi_match(DestReg, MRI,
7857 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7858 m_GSMax(m_Reg(X), NegPattern),
7859 m_GUMin(m_Reg(X), NegPattern),
7860 m_GUMax(m_Reg(X), NegPattern)))))) {
7861 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7862 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7863 if (isLegal({NewOpc, {DestTy}})) {
7864 MatchInfo = [=](MachineIRBuilder &B) {
7865 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7866 };
7867 return true;
7868 }
7869 }
7870
7871 return false;
7872}
7873
7876
7877 if (tryFoldSelectOfConstants(Select, MatchInfo))
7878 return true;
7879
7880 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7881 return true;
7882
7883 return false;
7884}
7885
7886/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7887/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7888/// into a single comparison using range-based reasoning.
7889/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7890bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7891 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7892 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7893 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7894 Register DstReg = Logic->getReg(0);
7895 Register LHS = Logic->getLHSReg();
7896 Register RHS = Logic->getRHSReg();
7897 unsigned Flags = Logic->getFlags();
7898
7899 // We need an G_ICMP on the LHS register.
7900 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7901 if (!Cmp1)
7902 return false;
7903
7904 // We need an G_ICMP on the RHS register.
7905 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7906 if (!Cmp2)
7907 return false;
7908
7909 // We want to fold the icmps.
7910 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7911 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7912 return false;
7913
7914 APInt C1;
7915 APInt C2;
7916 std::optional<ValueAndVReg> MaybeC1 =
7918 if (!MaybeC1)
7919 return false;
7920 C1 = MaybeC1->Value;
7921
7922 std::optional<ValueAndVReg> MaybeC2 =
7924 if (!MaybeC2)
7925 return false;
7926 C2 = MaybeC2->Value;
7927
7928 Register R1 = Cmp1->getLHSReg();
7929 Register R2 = Cmp2->getLHSReg();
7930 CmpInst::Predicate Pred1 = Cmp1->getCond();
7931 CmpInst::Predicate Pred2 = Cmp2->getCond();
7932 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7933 LLT CmpOperandTy = MRI.getType(R1);
7934
7935 if (CmpOperandTy.isPointer())
7936 return false;
7937
7938 // We build ands, adds, and constants of type CmpOperandTy.
7939 // They must be legal to build.
7940 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7941 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7942 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7943 return false;
7944
7945 // Look through add of a constant offset on R1, R2, or both operands. This
7946 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7947 std::optional<APInt> Offset1;
7948 std::optional<APInt> Offset2;
7949 if (R1 != R2) {
7950 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7951 std::optional<ValueAndVReg> MaybeOffset1 =
7953 if (MaybeOffset1) {
7954 R1 = Add->getLHSReg();
7955 Offset1 = MaybeOffset1->Value;
7956 }
7957 }
7958 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7959 std::optional<ValueAndVReg> MaybeOffset2 =
7961 if (MaybeOffset2) {
7962 R2 = Add->getLHSReg();
7963 Offset2 = MaybeOffset2->Value;
7964 }
7965 }
7966 }
7967
7968 if (R1 != R2)
7969 return false;
7970
7971 // We calculate the icmp ranges including maybe offsets.
7972 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7973 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7974 if (Offset1)
7975 CR1 = CR1.subtract(*Offset1);
7976
7977 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7978 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7979 if (Offset2)
7980 CR2 = CR2.subtract(*Offset2);
7981
7982 bool CreateMask = false;
7983 APInt LowerDiff;
7984 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7985 if (!CR) {
7986 // We need non-wrapping ranges.
7987 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7988 return false;
7989
7990 // Check whether we have equal-size ranges that only differ by one bit.
7991 // In that case we can apply a mask to map one range onto the other.
7992 LowerDiff = CR1.getLower() ^ CR2.getLower();
7993 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7994 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7995 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7996 CR1Size != CR2.getUpper() - CR2.getLower())
7997 return false;
7998
7999 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
8000 CreateMask = true;
8001 }
8002
8003 if (IsAnd)
8004 CR = CR->inverse();
8005
8006 CmpInst::Predicate NewPred;
8007 APInt NewC, Offset;
8008 CR->getEquivalentICmp(NewPred, NewC, Offset);
8009
8010 // We take the result type of one of the original icmps, CmpTy, for
8011 // the to be build icmp. The operand type, CmpOperandTy, is used for
8012 // the other instructions and constants to be build. The types of
8013 // the parameters and output are the same for add and and. CmpTy
8014 // and the type of DstReg might differ. That is why we zext or trunc
8015 // the icmp into the destination register.
8016
8017 MatchInfo = [=](MachineIRBuilder &B) {
8018 if (CreateMask && Offset != 0) {
8019 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8020 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8021 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8022 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
8023 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8024 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8025 B.buildZExtOrTrunc(DstReg, ICmp);
8026 } else if (CreateMask && Offset == 0) {
8027 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8028 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8029 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8030 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
8031 B.buildZExtOrTrunc(DstReg, ICmp);
8032 } else if (!CreateMask && Offset != 0) {
8033 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8034 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
8035 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8036 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8037 B.buildZExtOrTrunc(DstReg, ICmp);
8038 } else if (!CreateMask && Offset == 0) {
8039 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8040 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
8041 B.buildZExtOrTrunc(DstReg, ICmp);
8042 } else {
8043 llvm_unreachable("unexpected configuration of CreateMask and Offset");
8044 }
8045 };
8046 return true;
8047}
8048
8049bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
8050 BuildFnTy &MatchInfo) const {
8051 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
8052 Register DestReg = Logic->getReg(0);
8053 Register LHS = Logic->getLHSReg();
8054 Register RHS = Logic->getRHSReg();
8055 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
8056
8057 // We need a compare on the LHS register.
8058 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
8059 if (!Cmp1)
8060 return false;
8061
8062 // We need a compare on the RHS register.
8063 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
8064 if (!Cmp2)
8065 return false;
8066
8067 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
8068 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
8069
8070 // We build one fcmp, want to fold the fcmps, replace the logic op,
8071 // and the fcmps must have the same shape.
8073 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
8074 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
8075 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
8076 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
8077 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
8078 return false;
8079
8080 CmpInst::Predicate PredL = Cmp1->getCond();
8081 CmpInst::Predicate PredR = Cmp2->getCond();
8082 Register LHS0 = Cmp1->getLHSReg();
8083 Register LHS1 = Cmp1->getRHSReg();
8084 Register RHS0 = Cmp2->getLHSReg();
8085 Register RHS1 = Cmp2->getRHSReg();
8086
8087 if (LHS0 == RHS1 && LHS1 == RHS0) {
8088 // Swap RHS operands to match LHS.
8089 PredR = CmpInst::getSwappedPredicate(PredR);
8090 std::swap(RHS0, RHS1);
8091 }
8092
8093 if (LHS0 == RHS0 && LHS1 == RHS1) {
8094 // We determine the new predicate.
8095 unsigned CmpCodeL = getFCmpCode(PredL);
8096 unsigned CmpCodeR = getFCmpCode(PredR);
8097 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
8098 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
8099 MatchInfo = [=](MachineIRBuilder &B) {
8100 // The fcmp predicates fill the lower part of the enum.
8101 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
8102 if (Pred == FCmpInst::FCMP_FALSE &&
8104 auto False = B.buildConstant(CmpTy, 0);
8105 B.buildZExtOrTrunc(DestReg, False);
8106 } else if (Pred == FCmpInst::FCMP_TRUE &&
8108 auto True =
8109 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
8110 CmpTy.isVector() /*isVector*/,
8111 true /*isFP*/));
8112 B.buildZExtOrTrunc(DestReg, True);
8113 } else { // We take the predicate without predicate optimizations.
8114 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
8115 B.buildZExtOrTrunc(DestReg, Cmp);
8116 }
8117 };
8118 return true;
8119 }
8120
8121 return false;
8122}
8123
8125 GAnd *And = cast<GAnd>(&MI);
8126
8127 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8128 return true;
8129
8130 if (tryFoldLogicOfFCmps(And, MatchInfo))
8131 return true;
8132
8133 return false;
8134}
8135
8137 GOr *Or = cast<GOr>(&MI);
8138
8139 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8140 return true;
8141
8142 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8143 return true;
8144
8145 return false;
8146}
8147
8149 BuildFnTy &MatchInfo) const {
8151
8152 // Addo has no flags
8153 Register Dst = Add->getReg(0);
8154 Register Carry = Add->getReg(1);
8155 Register LHS = Add->getLHSReg();
8156 Register RHS = Add->getRHSReg();
8157 bool IsSigned = Add->isSigned();
8158 LLT DstTy = MRI.getType(Dst);
8159 LLT CarryTy = MRI.getType(Carry);
8160
8161 // Fold addo, if the carry is dead -> add, undef.
8162 if (MRI.use_nodbg_empty(Carry) &&
8163 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8164 MatchInfo = [=](MachineIRBuilder &B) {
8165 B.buildAdd(Dst, LHS, RHS);
8166 B.buildUndef(Carry);
8167 };
8168 return true;
8169 }
8170
8171 // Canonicalize constant to RHS.
8172 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8173 if (IsSigned) {
8174 MatchInfo = [=](MachineIRBuilder &B) {
8175 B.buildSAddo(Dst, Carry, RHS, LHS);
8176 };
8177 return true;
8178 }
8179 // !IsSigned
8180 MatchInfo = [=](MachineIRBuilder &B) {
8181 B.buildUAddo(Dst, Carry, RHS, LHS);
8182 };
8183 return true;
8184 }
8185
8186 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8187 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8188
8189 // Fold addo(c1, c2) -> c3, carry.
8190 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8192 bool Overflow;
8193 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8194 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8195 MatchInfo = [=](MachineIRBuilder &B) {
8196 B.buildConstant(Dst, Result);
8197 B.buildConstant(Carry, Overflow);
8198 };
8199 return true;
8200 }
8201
8202 // Fold (addo x, 0) -> x, no carry
8203 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8204 MatchInfo = [=](MachineIRBuilder &B) {
8205 B.buildCopy(Dst, LHS);
8206 B.buildConstant(Carry, 0);
8207 };
8208 return true;
8209 }
8210
8211 // Given 2 constant operands whose sum does not overflow:
8212 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8213 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8214 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8215 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8216 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8217 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8218 std::optional<APInt> MaybeAddRHS =
8219 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8220 if (MaybeAddRHS) {
8221 bool Overflow;
8222 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8223 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8224 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8225 if (IsSigned) {
8226 MatchInfo = [=](MachineIRBuilder &B) {
8227 auto ConstRHS = B.buildConstant(DstTy, NewC);
8228 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8229 };
8230 return true;
8231 }
8232 // !IsSigned
8233 MatchInfo = [=](MachineIRBuilder &B) {
8234 auto ConstRHS = B.buildConstant(DstTy, NewC);
8235 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8236 };
8237 return true;
8238 }
8239 }
8240 };
8241
8242 // We try to combine addo to non-overflowing add.
8243 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8245 return false;
8246
8247 // We try to combine uaddo to non-overflowing add.
8248 if (!IsSigned) {
8249 ConstantRange CRLHS =
8250 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8251 ConstantRange CRRHS =
8252 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8253
8254 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8256 return false;
8258 MatchInfo = [=](MachineIRBuilder &B) {
8259 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8260 B.buildConstant(Carry, 0);
8261 };
8262 return true;
8263 }
8266 MatchInfo = [=](MachineIRBuilder &B) {
8267 B.buildAdd(Dst, LHS, RHS);
8268 B.buildConstant(Carry, 1);
8269 };
8270 return true;
8271 }
8272 }
8273 return false;
8274 }
8275
8276 // We try to combine saddo to non-overflowing add.
8277
8278 // If LHS and RHS each have at least two sign bits, then there is no signed
8279 // overflow.
8280 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8281 MatchInfo = [=](MachineIRBuilder &B) {
8282 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8283 B.buildConstant(Carry, 0);
8284 };
8285 return true;
8286 }
8287
8288 ConstantRange CRLHS =
8289 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8290 ConstantRange CRRHS =
8291 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8292
8293 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8295 return false;
8297 MatchInfo = [=](MachineIRBuilder &B) {
8298 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8299 B.buildConstant(Carry, 0);
8300 };
8301 return true;
8302 }
8305 MatchInfo = [=](MachineIRBuilder &B) {
8306 B.buildAdd(Dst, LHS, RHS);
8307 B.buildConstant(Carry, 1);
8308 };
8309 return true;
8310 }
8311 }
8312
8313 return false;
8314}
8315
8317 BuildFnTy &MatchInfo) const {
8319 MatchInfo(Builder);
8320 Root->eraseFromParent();
8321}
8322
8324 int64_t Exponent) const {
8325 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8327}
8328
8330 int64_t Exponent) const {
8331 auto [Dst, Base] = MI.getFirst2Regs();
8332 LLT Ty = MRI.getType(Dst);
8333 int64_t ExpVal = Exponent;
8334
8335 if (ExpVal == 0) {
8336 Builder.buildFConstant(Dst, 1.0);
8337 MI.removeFromParent();
8338 return;
8339 }
8340
8341 if (ExpVal < 0)
8342 ExpVal = -ExpVal;
8343
8344 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8345 // to generate the multiply sequence. There are more optimal ways to do this
8346 // (for example, powi(x,15) generates one more multiply than it should), but
8347 // this has the benefit of being both really simple and much better than a
8348 // libcall.
8349 std::optional<SrcOp> Res;
8350 SrcOp CurSquare = Base;
8351 while (ExpVal > 0) {
8352 if (ExpVal & 1) {
8353 if (!Res)
8354 Res = CurSquare;
8355 else
8356 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8357 }
8358
8359 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8360 ExpVal >>= 1;
8361 }
8362
8363 // If the original exponent was negative, invert the result, producing
8364 // 1/(x*x*x).
8365 if (Exponent < 0)
8366 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8367 MI.getFlags());
8368
8369 Builder.buildCopy(Dst, *Res);
8370 MI.eraseFromParent();
8371}
8372
8374 BuildFnTy &MatchInfo) const {
8375 // fold (A+C1)-C2 -> A+(C1-C2)
8376 const GSub *Sub = cast<GSub>(&MI);
8377 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8378
8379 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8380 return false;
8381
8382 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8383 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8384
8385 Register Dst = Sub->getReg(0);
8386 LLT DstTy = MRI.getType(Dst);
8387
8388 MatchInfo = [=](MachineIRBuilder &B) {
8389 auto Const = B.buildConstant(DstTy, C1 - C2);
8390 B.buildAdd(Dst, Add->getLHSReg(), Const);
8391 };
8392
8393 return true;
8394}
8395
8397 BuildFnTy &MatchInfo) const {
8398 // fold C2-(A+C1) -> (C2-C1)-A
8399 const GSub *Sub = cast<GSub>(&MI);
8400 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8401
8402 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8403 return false;
8404
8405 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8406 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8407
8408 Register Dst = Sub->getReg(0);
8409 LLT DstTy = MRI.getType(Dst);
8410
8411 MatchInfo = [=](MachineIRBuilder &B) {
8412 auto Const = B.buildConstant(DstTy, C2 - C1);
8413 B.buildSub(Dst, Const, Add->getLHSReg());
8414 };
8415
8416 return true;
8417}
8418
8420 BuildFnTy &MatchInfo) const {
8421 // fold (A-C1)-C2 -> A-(C1+C2)
8422 const GSub *Sub1 = cast<GSub>(&MI);
8423 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8424
8425 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8426 return false;
8427
8428 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8429 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8430
8431 Register Dst = Sub1->getReg(0);
8432 LLT DstTy = MRI.getType(Dst);
8433
8434 MatchInfo = [=](MachineIRBuilder &B) {
8435 auto Const = B.buildConstant(DstTy, C1 + C2);
8436 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8437 };
8438
8439 return true;
8440}
8441
8443 BuildFnTy &MatchInfo) const {
8444 // fold (C1-A)-C2 -> (C1-C2)-A
8445 const GSub *Sub1 = cast<GSub>(&MI);
8446 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8447
8448 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8449 return false;
8450
8451 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8452 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8453
8454 Register Dst = Sub1->getReg(0);
8455 LLT DstTy = MRI.getType(Dst);
8456
8457 MatchInfo = [=](MachineIRBuilder &B) {
8458 auto Const = B.buildConstant(DstTy, C1 - C2);
8459 B.buildSub(Dst, Const, Sub2->getRHSReg());
8460 };
8461
8462 return true;
8463}
8464
8466 BuildFnTy &MatchInfo) const {
8467 // fold ((A-C1)+C2) -> (A+(C2-C1))
8468 const GAdd *Add = cast<GAdd>(&MI);
8469 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8470
8471 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8472 return false;
8473
8474 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8475 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8476
8477 Register Dst = Add->getReg(0);
8478 LLT DstTy = MRI.getType(Dst);
8479
8480 MatchInfo = [=](MachineIRBuilder &B) {
8481 auto Const = B.buildConstant(DstTy, C2 - C1);
8482 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8483 };
8484
8485 return true;
8486}
8487
8489 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8490 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8491
8492 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8493 return false;
8494
8495 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8496
8497 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8498
8499 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8500 // $any:_(<8 x s16>) = G_ANYEXT $bv
8501 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8502 //
8503 // ->
8504 //
8505 // $any:_(s16) = G_ANYEXT $bv[0]
8506 // $any1:_(s16) = G_ANYEXT $bv[1]
8507 // $any2:_(s16) = G_ANYEXT $bv[2]
8508 // $any3:_(s16) = G_ANYEXT $bv[3]
8509 // $any4:_(s16) = G_ANYEXT $bv[4]
8510 // $any5:_(s16) = G_ANYEXT $bv[5]
8511 // $any6:_(s16) = G_ANYEXT $bv[6]
8512 // $any7:_(s16) = G_ANYEXT $bv[7]
8513 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8514 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8515
8516 // We want to unmerge into vectors.
8517 if (!DstTy.isFixedVector())
8518 return false;
8519
8520 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8521 if (!Any)
8522 return false;
8523
8524 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8525
8526 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8527 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8528
8529 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8530 return false;
8531
8532 // FIXME: check element types?
8533 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8534 return false;
8535
8536 LLT BigBvTy = MRI.getType(BV->getReg(0));
8537 LLT SmallBvTy = DstTy;
8538 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8539
8541 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8542 return false;
8543
8544 // We check the legality of scalar anyext.
8546 {TargetOpcode::G_ANYEXT,
8547 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8548 return false;
8549
8550 MatchInfo = [=](MachineIRBuilder &B) {
8551 // Build into each G_UNMERGE_VALUES def
8552 // a small build vector with anyext from the source build vector.
8553 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8555 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8556 Register SourceArray =
8557 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8558 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8559 Ops.push_back(AnyExt.getReg(0));
8560 }
8561 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8562 };
8563 };
8564 return true;
8565 };
8566
8567 return false;
8568}
8569
8571 BuildFnTy &MatchInfo) const {
8572
8573 bool Changed = false;
8574 auto &Shuffle = cast<GShuffleVector>(MI);
8575 ArrayRef<int> OrigMask = Shuffle.getMask();
8576 SmallVector<int, 16> NewMask;
8577 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8578 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8579 const unsigned NumDstElts = OrigMask.size();
8580 for (unsigned i = 0; i != NumDstElts; ++i) {
8581 int Idx = OrigMask[i];
8582 if (Idx >= (int)NumSrcElems) {
8583 Idx = -1;
8584 Changed = true;
8585 }
8586 NewMask.push_back(Idx);
8587 }
8588
8589 if (!Changed)
8590 return false;
8591
8592 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8593 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8594 std::move(NewMask));
8595 };
8596
8597 return true;
8598}
8599
8600static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8601 const unsigned MaskSize = Mask.size();
8602 for (unsigned I = 0; I < MaskSize; ++I) {
8603 int Idx = Mask[I];
8604 if (Idx < 0)
8605 continue;
8606
8607 if (Idx < (int)NumElems)
8608 Mask[I] = Idx + NumElems;
8609 else
8610 Mask[I] = Idx - NumElems;
8611 }
8612}
8613
8615 BuildFnTy &MatchInfo) const {
8616
8617 auto &Shuffle = cast<GShuffleVector>(MI);
8618 // If any of the two inputs is already undef, don't check the mask again to
8619 // prevent infinite loop
8620 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8621 return false;
8622
8623 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8624 return false;
8625
8626 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8627 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8629 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8630 return false;
8631
8632 ArrayRef<int> Mask = Shuffle.getMask();
8633 const unsigned NumSrcElems = Src1Ty.getNumElements();
8634
8635 bool TouchesSrc1 = false;
8636 bool TouchesSrc2 = false;
8637 const unsigned NumElems = Mask.size();
8638 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8639 if (Mask[Idx] < 0)
8640 continue;
8641
8642 if (Mask[Idx] < (int)NumSrcElems)
8643 TouchesSrc1 = true;
8644 else
8645 TouchesSrc2 = true;
8646 }
8647
8648 if (TouchesSrc1 == TouchesSrc2)
8649 return false;
8650
8651 Register NewSrc1 = Shuffle.getSrc1Reg();
8652 SmallVector<int, 16> NewMask(Mask);
8653 if (TouchesSrc2) {
8654 NewSrc1 = Shuffle.getSrc2Reg();
8655 commuteMask(NewMask, NumSrcElems);
8656 }
8657
8658 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8659 auto Undef = B.buildUndef(Src1Ty);
8660 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8661 };
8662
8663 return true;
8664}
8665
8667 BuildFnTy &MatchInfo) const {
8668 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8669
8670 Register Dst = Subo->getReg(0);
8671 Register LHS = Subo->getLHSReg();
8672 Register RHS = Subo->getRHSReg();
8673 Register Carry = Subo->getCarryOutReg();
8674 LLT DstTy = MRI.getType(Dst);
8675 LLT CarryTy = MRI.getType(Carry);
8676
8677 // Check legality before known bits.
8678 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8680 return false;
8681
8682 ConstantRange KBLHS =
8683 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8684 /* IsSigned=*/Subo->isSigned());
8685 ConstantRange KBRHS =
8686 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8687 /* IsSigned=*/Subo->isSigned());
8688
8689 if (Subo->isSigned()) {
8690 // G_SSUBO
8691 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8693 return false;
8695 MatchInfo = [=](MachineIRBuilder &B) {
8696 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8697 B.buildConstant(Carry, 0);
8698 };
8699 return true;
8700 }
8703 MatchInfo = [=](MachineIRBuilder &B) {
8704 B.buildSub(Dst, LHS, RHS);
8705 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8706 /*isVector=*/CarryTy.isVector(),
8707 /*isFP=*/false));
8708 };
8709 return true;
8710 }
8711 }
8712 return false;
8713 }
8714
8715 // G_USUBO
8716 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8718 return false;
8720 MatchInfo = [=](MachineIRBuilder &B) {
8721 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8722 B.buildConstant(Carry, 0);
8723 };
8724 return true;
8725 }
8728 MatchInfo = [=](MachineIRBuilder &B) {
8729 B.buildSub(Dst, LHS, RHS);
8730 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8731 /*isVector=*/CarryTy.isVector(),
8732 /*isFP=*/false));
8733 };
8734 return true;
8735 }
8736 }
8737
8738 return false;
8739}
8740
8741// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8742// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8744 BuildFnTy &MatchInfo) const {
8745 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8746 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON) &&
8747 "Expected G_CTLZ variant");
8748
8749 const Register Dst = CtlzMI.getOperand(0).getReg();
8750 Register Src = CtlzMI.getOperand(1).getReg();
8751
8752 LLT Ty = MRI.getType(Dst);
8753 LLT SrcTy = MRI.getType(Src);
8754
8755 if (!(Ty.isValid() && Ty.isScalar()))
8756 return false;
8757
8758 if (!LI)
8759 return false;
8760
8761 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8762 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8763
8764 switch (LI->getAction(Query).Action) {
8765 default:
8766 return false;
8770 break;
8771 }
8772
8773 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8774 Register V;
8775 bool NeedAdd = true;
8776 if (mi_match(Src, MRI,
8778 m_SpecificICst(1))))) {
8779 NeedAdd = false;
8780 Src = V;
8781 }
8782
8783 unsigned BitWidth = Ty.getScalarSizeInBits();
8784
8785 Register X;
8786 if (!mi_match(Src, MRI,
8789 m_SpecificICst(BitWidth - 1)))))))
8790 return false;
8791
8792 MatchInfo = [=](MachineIRBuilder &B) {
8793 if (!NeedAdd) {
8794 B.buildCTLS(Dst, X);
8795 return;
8796 }
8797
8798 auto Ctls = B.buildCTLS(Ty, X);
8799 auto One = B.buildConstant(Ty, 1);
8800
8801 B.buildAdd(Dst, Ctls, One);
8802 };
8803
8804 return true;
8805}
8806
8807// Fold shr ( add ( ext X, ext Y ), 1 ) -> avgfloor ( x, y )
8808// Fold shr ( add ( ext X, ext Y, 1 ), 1 ) -> avgceil ( x, y )
8811 unsigned TargetOpc) const {
8812 assert((MI.getOpcode() == TargetOpcode::G_LSHR ||
8813 MI.getOpcode() == TargetOpcode::G_ASHR) &&
8814 "Expected G_LSHR/G_ASHR");
8815
8816 LLT XTy = MRI.getType(X);
8817 return XTy == MRI.getType(Y) && isLegal({TargetOpc, {XTy}});
8818}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
bool isNaN() const
Definition APFloat.h:1536
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1294
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchAVG(MachineInstr &MI, MachineRegisterInfo &MRI, Register X, Register Y, unsigned TargetOpc) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
const TargetInstrInfo * TII
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchConstantFoldUnaryIntOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Constant fold a unary integer op (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON variants,...
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool matchOperandIsKnownToBeAPowerOfTwo(const MachineOperand &MO, bool OrNegative=false) const
Check if operand MO is known to be a power of 2.
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifySRemByPow2(MachineInstr &MI) const
Combine G_SREM x, (+/-2^k) to a bias-and-mask sequence.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to G_BITCAST(G_BUILD_VECTOR(.....
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1444
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1984
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1404
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:741
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1584
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1616
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:672
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1507
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:200
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1437
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:908
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1540
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1641
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI SmallVector< APInt > ConstantFoldUnaryIntOp(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI)
Tries to constant fold a unary integer operation (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON vari...
Definition Utils.cpp:945
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1422
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:229
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...