LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 TII(Builder.getMF().getSubtarget().getInstrInfo()),
66 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
67 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
68 (void)this->VT;
69}
70
72 return *Builder.getMF().getSubtarget().getTargetLowering();
73}
74
76 return Builder.getMF();
77}
78
82
83LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
84
85/// \returns The little endian in-memory byte position of byte \p I in a
86/// \p ByteWidth bytes wide type.
87///
88/// E.g. Given a 4-byte type x, x[0] -> byte 0
89static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
90 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
91 return I;
92}
93
94/// Determines the LogBase2 value for a non-null input value using the
95/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
97 auto &MRI = *MIB.getMRI();
98 LLT Ty = MRI.getType(V);
99 auto Ctlz = MIB.buildCTLZ(Ty, V);
100 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
101 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
102}
103
104/// \returns The big endian in-memory byte position of byte \p I in a
105/// \p ByteWidth bytes wide type.
106///
107/// E.g. Given a 4-byte type x, x[0] -> byte 3
108static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
109 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
110 return ByteWidth - I - 1;
111}
112
113/// Given a map from byte offsets in memory to indices in a load/store,
114/// determine if that map corresponds to a little or big endian byte pattern.
115///
116/// \param MemOffset2Idx maps memory offsets to address offsets.
117/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
118///
119/// \returns true if the map corresponds to a big endian byte pattern, false if
120/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
121///
122/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
123/// are as follows:
124///
125/// AddrOffset Little endian Big endian
126/// 0 0 3
127/// 1 1 2
128/// 2 2 1
129/// 3 3 0
130static std::optional<bool>
132 int64_t LowestIdx) {
133 // Need at least two byte positions to decide on endianness.
134 unsigned Width = MemOffset2Idx.size();
135 if (Width < 2)
136 return std::nullopt;
137 bool BigEndian = true, LittleEndian = true;
138 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
139 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
140 if (MemOffsetAndIdx == MemOffset2Idx.end())
141 return std::nullopt;
142 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
143 assert(Idx >= 0 && "Expected non-negative byte offset?");
144 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
145 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
146 if (!BigEndian && !LittleEndian)
147 return std::nullopt;
148 }
149
150 assert((BigEndian != LittleEndian) &&
151 "Pattern cannot be both big and little endian!");
152 return BigEndian;
153}
154
156
157bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
158 assert(LI && "Must have LegalizerInfo to query isLegal!");
159 return LI->getAction(Query).Action == LegalizeActions::Legal;
160}
161
163 const LegalityQuery &Query) const {
164 return isPreLegalize() || isLegal(Query);
165}
166
168 return isLegal(Query) ||
169 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
170}
171
173 const LegalityQuery &Query) const {
174 LegalizeAction Action = LI->getAction(Query).Action;
175 return Action == LegalizeActions::Legal ||
177}
178
180 if (!Ty.isVector())
181 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
182 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
183 if (isPreLegalize())
184 return true;
185 LLT EltTy = Ty.getElementType();
186 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
187 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
188}
189
191 Register ToReg) const {
192 Observer.changingAllUsesOfReg(MRI, FromReg);
193
194 if (MRI.constrainRegAttrs(ToReg, FromReg))
195 MRI.replaceRegWith(FromReg, ToReg);
196 else
197 Builder.buildCopy(FromReg, ToReg);
198
199 Observer.finishedChangingAllUsesOfReg();
200}
201
203 MachineOperand &FromRegOp,
204 Register ToReg) const {
205 assert(FromRegOp.getParent() && "Expected an operand in an MI");
206 Observer.changingInstr(*FromRegOp.getParent());
207
208 FromRegOp.setReg(ToReg);
209
210 Observer.changedInstr(*FromRegOp.getParent());
211}
212
214 unsigned ToOpcode) const {
215 Observer.changingInstr(FromMI);
216
217 FromMI.setDesc(Builder.getTII().get(ToOpcode));
218
219 Observer.changedInstr(FromMI);
220}
221
223 return RBI->getRegBank(Reg, MRI, *TRI);
224}
225
227 const RegisterBank *RegBank) const {
228 if (RegBank)
229 MRI.setRegBank(Reg, *RegBank);
230}
231
233 if (matchCombineCopy(MI)) {
235 return true;
236 }
237 return false;
238}
240 if (MI.getOpcode() != TargetOpcode::COPY)
241 return false;
242 Register DstReg = MI.getOperand(0).getReg();
243 Register SrcReg = MI.getOperand(1).getReg();
244 return canReplaceReg(DstReg, SrcReg, MRI);
245}
247 Register DstReg = MI.getOperand(0).getReg();
248 Register SrcReg = MI.getOperand(1).getReg();
249 replaceRegWith(MRI, DstReg, SrcReg);
250 MI.eraseFromParent();
251}
252
254 MachineInstr &MI, BuildFnTy &MatchInfo) const {
255 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
256 Register DstOp = MI.getOperand(0).getReg();
257 Register OrigOp = MI.getOperand(1).getReg();
258
259 if (!MRI.hasOneNonDBGUse(OrigOp))
260 return false;
261
262 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
263 // Even if only a single operand of the PHI is not guaranteed non-poison,
264 // moving freeze() backwards across a PHI can cause optimization issues for
265 // other users of that operand.
266 //
267 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
268 // the source register is unprofitable because it makes the freeze() more
269 // strict than is necessary (it would affect the whole register instead of
270 // just the subreg being frozen).
271 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
272 return false;
273
274 if (canCreateUndefOrPoison(OrigOp, MRI,
275 /*ConsiderFlagsAndMetadata=*/false))
276 return false;
277
278 std::optional<MachineOperand> MaybePoisonOperand;
279 for (MachineOperand &Operand : OrigDef->uses()) {
280 if (!Operand.isReg())
281 return false;
282
283 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
284 continue;
285
286 if (!MaybePoisonOperand)
287 MaybePoisonOperand = Operand;
288 else {
289 // We have more than one maybe-poison operand. Moving the freeze is
290 // unsafe.
291 return false;
292 }
293 }
294
295 // Eliminate freeze if all operands are guaranteed non-poison.
296 if (!MaybePoisonOperand) {
297 MatchInfo = [=](MachineIRBuilder &B) {
298 Observer.changingInstr(*OrigDef);
299 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
300 Observer.changedInstr(*OrigDef);
301 B.buildCopy(DstOp, OrigOp);
302 };
303 return true;
304 }
305
306 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
307 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
308
309 MatchInfo = [=](MachineIRBuilder &B) mutable {
310 Observer.changingInstr(*OrigDef);
311 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
312 Observer.changedInstr(*OrigDef);
313 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
314 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
316 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
317 Freeze.getReg(0));
318 replaceRegWith(MRI, DstOp, OrigOp);
319 };
320 return true;
321}
322
325 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
326 "Invalid instruction");
327 bool IsUndef = true;
328 MachineInstr *Undef = nullptr;
329
330 // Walk over all the operands of concat vectors and check if they are
331 // build_vector themselves or undef.
332 // Then collect their operands in Ops.
333 for (const MachineOperand &MO : MI.uses()) {
334 Register Reg = MO.getReg();
335 MachineInstr *Def = MRI.getVRegDef(Reg);
336 assert(Def && "Operand not defined");
337 if (!MRI.hasOneNonDBGUse(Reg))
338 return false;
339 switch (Def->getOpcode()) {
340 case TargetOpcode::G_BUILD_VECTOR:
341 IsUndef = false;
342 // Remember the operands of the build_vector to fold
343 // them into the yet-to-build flattened concat vectors.
344 for (const MachineOperand &BuildVecMO : Def->uses())
345 Ops.push_back(BuildVecMO.getReg());
346 break;
347 case TargetOpcode::G_IMPLICIT_DEF: {
348 LLT OpType = MRI.getType(Reg);
349 // Keep one undef value for all the undef operands.
350 if (!Undef) {
351 Builder.setInsertPt(*MI.getParent(), MI);
352 Undef = Builder.buildUndef(OpType.getScalarType());
353 }
354 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
355 OpType.getScalarType() &&
356 "All undefs should have the same type");
357 // Break the undef vector in as many scalar elements as needed
358 // for the flattening.
359 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
360 EltIdx != EltEnd; ++EltIdx)
361 Ops.push_back(Undef->getOperand(0).getReg());
362 break;
363 }
364 default:
365 return false;
366 }
367 }
368
369 // Check if the combine is illegal
370 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
372 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
373 return false;
374 }
375
376 if (IsUndef)
377 Ops.clear();
378
379 return true;
380}
383 // We determined that the concat_vectors can be flatten.
384 // Generate the flattened build_vector.
385 Register DstReg = MI.getOperand(0).getReg();
386 Builder.setInsertPt(*MI.getParent(), MI);
387 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
388
389 // Note: IsUndef is sort of redundant. We could have determine it by
390 // checking that at all Ops are undef. Alternatively, we could have
391 // generate a build_vector of undefs and rely on another combine to
392 // clean that up. For now, given we already gather this information
393 // in matchCombineConcatVectors, just save compile time and issue the
394 // right thing.
395 if (Ops.empty())
396 Builder.buildUndef(NewDstReg);
397 else
398 Builder.buildBuildVector(NewDstReg, Ops);
399 replaceRegWith(MRI, DstReg, NewDstReg);
400 MI.eraseFromParent();
401}
402
405 auto &BV = cast<GBuildVector>(MI);
406
407 // Look at the first operand for a unmerge(bitcast) from a scalar type.
408 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
409 if (!Unmerge || Unmerge->getReg(0) != BV.getSourceReg(0))
410 return false;
411 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
412 if (BC->getOpcode() != TargetOpcode::G_BITCAST)
413 return false;
414 LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
415 unsigned Factor = Unmerge->getNumDefs();
416 if (!InputTy.isScalar() || BV.getNumSources() % Factor != 0)
417 return false;
418
419 // Check if the build_vector is legal
420 LLT BVDstTy = LLT::fixed_vector(BV.getNumSources() / Factor, InputTy);
421 if (!isLegal({TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
422 return false;
423
424 // Check all other operands are bitcasts or undef.
425 for (unsigned Idx = 0; Idx < BV.getNumSources(); Idx += Factor) {
426 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(Idx), MRI);
427 if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
428 MachineInstr *Src = MRI.getVRegDef(BV.getSourceReg(Idx + J));
429 if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
430 return true;
431 return Unmerge && BV.getSourceReg(Idx + J) == Unmerge->getReg(J);
432 }))
433 return false;
434 if (!Unmerge)
435 Ops.push_back(0);
436 else {
437 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
438 if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
439 MRI.getType(BC->getOperand(1).getReg()) != InputTy)
440 return false;
441 Ops.push_back(BC->getOperand(1).getReg());
442 }
443 }
444
445 return true;
446}
447
450 LLT SrcTy = MRI.getType(Ops[0]);
451 // Build undef if any operations require it.
452 Register Undef = 0;
453 for (Register &Op : Ops) {
454 if (!Op) {
455 if (!Undef)
456 Undef = Builder.buildUndef(SrcTy).getReg(0);
457 Op = Undef;
458 }
459 }
460
461 LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
462 auto BV = Builder.buildBuildVector(BVDstTy, Ops);
463 Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
464 MI.eraseFromParent();
465}
466
468 auto &Shuffle = cast<GShuffleVector>(MI);
469
470 Register SrcVec1 = Shuffle.getSrc1Reg();
471 Register SrcVec2 = Shuffle.getSrc2Reg();
472 LLT EltTy = MRI.getType(SrcVec1).getElementType();
473 int Width = MRI.getType(SrcVec1).getNumElements();
474
475 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
476 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
477
478 SmallVector<Register> Extracts;
479 // Select only applicable elements from unmerged values.
480 for (int Val : Shuffle.getMask()) {
481 if (Val == -1)
482 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
483 else if (Val < Width)
484 Extracts.push_back(Unmerge1.getReg(Val));
485 else
486 Extracts.push_back(Unmerge2.getReg(Val - Width));
487 }
488 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
489 if (Extracts.size() == 1)
490 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
491 else
492 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
493 MI.eraseFromParent();
494}
495
498 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
499 auto ConcatMI1 =
500 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
501 auto ConcatMI2 =
502 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
503 if (!ConcatMI1 || !ConcatMI2)
504 return false;
505
506 // Check that the sources of the Concat instructions have the same type
507 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
508 MRI.getType(ConcatMI2->getSourceReg(0)))
509 return false;
510
511 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
512 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
513 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
514 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
515 // Check if the index takes a whole source register from G_CONCAT_VECTORS
516 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
517 if (Mask[i] == -1) {
518 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
519 if (i + j >= Mask.size())
520 return false;
521 if (Mask[i + j] != -1)
522 return false;
523 }
525 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
526 return false;
527 Ops.push_back(0);
528 } else if (Mask[i] % ConcatSrcNumElt == 0) {
529 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
530 if (i + j >= Mask.size())
531 return false;
532 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
533 return false;
534 }
535 // Retrieve the source register from its respective G_CONCAT_VECTORS
536 // instruction
537 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
538 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
539 } else {
540 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
541 ConcatMI1->getNumSources()));
542 }
543 } else {
544 return false;
545 }
546 }
547
549 {TargetOpcode::G_CONCAT_VECTORS,
550 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
551 return false;
552
553 return !Ops.empty();
554}
555
558 LLT SrcTy;
559 for (Register &Reg : Ops) {
560 if (Reg != 0)
561 SrcTy = MRI.getType(Reg);
562 }
563 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
564
565 Register UndefReg = 0;
566
567 for (Register &Reg : Ops) {
568 if (Reg == 0) {
569 if (UndefReg == 0)
570 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
571 Reg = UndefReg;
572 }
573 }
574
575 if (Ops.size() > 1)
576 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
577 else
578 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
579 MI.eraseFromParent();
580}
581
584 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
585 "Invalid instruction kind");
586 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
587 Register Src1 = MI.getOperand(1).getReg();
588 LLT SrcType = MRI.getType(Src1);
589
590 unsigned DstNumElts = DstType.getNumElements();
591 unsigned SrcNumElts = SrcType.getNumElements();
592
593 // If the resulting vector is smaller than the size of the source
594 // vectors being concatenated, we won't be able to replace the
595 // shuffle vector into a concat_vectors.
596 //
597 // Note: We may still be able to produce a concat_vectors fed by
598 // extract_vector_elt and so on. It is less clear that would
599 // be better though, so don't bother for now.
600 //
601 // If the destination is a scalar, the size of the sources doesn't
602 // matter. we will lower the shuffle to a plain copy. This will
603 // work only if the source and destination have the same size. But
604 // that's covered by the next condition.
605 //
606 // TODO: If the size between the source and destination don't match
607 // we could still emit an extract vector element in that case.
608 if (DstNumElts < 2 * SrcNumElts)
609 return false;
610
611 // Check that the shuffle mask can be broken evenly between the
612 // different sources.
613 if (DstNumElts % SrcNumElts != 0)
614 return false;
615
616 // Mask length is a multiple of the source vector length.
617 // Check if the shuffle is some kind of concatenation of the input
618 // vectors.
619 unsigned NumConcat = DstNumElts / SrcNumElts;
620 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
621 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
622 for (unsigned i = 0; i != DstNumElts; ++i) {
623 int Idx = Mask[i];
624 // Undef value.
625 if (Idx < 0)
626 continue;
627 // Ensure the indices in each SrcType sized piece are sequential and that
628 // the same source is used for the whole piece.
629 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
630 (ConcatSrcs[i / SrcNumElts] >= 0 &&
631 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
632 return false;
633 // Remember which source this index came from.
634 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
635 }
636
637 // The shuffle is concatenating multiple vectors together.
638 // Collect the different operands for that.
639 Register UndefReg;
640 Register Src2 = MI.getOperand(2).getReg();
641 for (auto Src : ConcatSrcs) {
642 if (Src < 0) {
643 if (!UndefReg) {
644 Builder.setInsertPt(*MI.getParent(), MI);
645 UndefReg = Builder.buildUndef(SrcType).getReg(0);
646 }
647 Ops.push_back(UndefReg);
648 } else if (Src == 0)
649 Ops.push_back(Src1);
650 else
651 Ops.push_back(Src2);
652 }
653 return true;
654}
655
657 ArrayRef<Register> Ops) const {
658 Register DstReg = MI.getOperand(0).getReg();
659 Builder.setInsertPt(*MI.getParent(), MI);
660 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
661
662 if (Ops.size() == 1)
663 Builder.buildCopy(NewDstReg, Ops[0]);
664 else
665 Builder.buildMergeLikeInstr(NewDstReg, Ops);
666
667 replaceRegWith(MRI, DstReg, NewDstReg);
668 MI.eraseFromParent();
669}
670
671namespace {
672
673/// Select a preference between two uses. CurrentUse is the current preference
674/// while *ForCandidate is attributes of the candidate under consideration.
675PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
676 PreferredTuple &CurrentUse,
677 const LLT TyForCandidate,
678 unsigned OpcodeForCandidate,
679 MachineInstr *MIForCandidate) {
680 if (!CurrentUse.Ty.isValid()) {
681 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
682 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
683 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
684 return CurrentUse;
685 }
686
687 // We permit the extend to hoist through basic blocks but this is only
688 // sensible if the target has extending loads. If you end up lowering back
689 // into a load and extend during the legalizer then the end result is
690 // hoisting the extend up to the load.
691
692 // Prefer defined extensions to undefined extensions as these are more
693 // likely to reduce the number of instructions.
694 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
695 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
696 return CurrentUse;
697 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
698 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
699 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
700
701 // Prefer sign extensions to zero extensions as sign-extensions tend to be
702 // more expensive. Don't do this if the load is already a zero-extend load
703 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
704 // later.
705 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
706 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
707 OpcodeForCandidate == TargetOpcode::G_ZEXT)
708 return CurrentUse;
709 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
710 OpcodeForCandidate == TargetOpcode::G_SEXT)
711 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
712 }
713
714 // This is potentially target specific. We've chosen the largest type
715 // because G_TRUNC is usually free. One potential catch with this is that
716 // some targets have a reduced number of larger registers than smaller
717 // registers and this choice potentially increases the live-range for the
718 // larger value.
719 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
720 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
721 }
722 return CurrentUse;
723}
724
725/// Find a suitable place to insert some instructions and insert them. This
726/// function accounts for special cases like inserting before a PHI node.
727/// The current strategy for inserting before PHI's is to duplicate the
728/// instructions for each predecessor. However, while that's ok for G_TRUNC
729/// on most targets since it generally requires no code, other targets/cases may
730/// want to try harder to find a dominating block.
731static void InsertInsnsWithoutSideEffectsBeforeUse(
734 MachineOperand &UseMO)>
735 Inserter) {
736 MachineInstr &UseMI = *UseMO.getParent();
737
738 MachineBasicBlock *InsertBB = UseMI.getParent();
739
740 // If the use is a PHI then we want the predecessor block instead.
741 if (UseMI.isPHI()) {
742 MachineOperand *PredBB = std::next(&UseMO);
743 InsertBB = PredBB->getMBB();
744 }
745
746 // If the block is the same block as the def then we want to insert just after
747 // the def instead of at the start of the block.
748 if (InsertBB == DefMI.getParent()) {
750 Inserter(InsertBB, std::next(InsertPt), UseMO);
751 return;
752 }
753
754 // Otherwise we want the start of the BB
755 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
756}
757} // end anonymous namespace
758
760 PreferredTuple Preferred;
761 if (matchCombineExtendingLoads(MI, Preferred)) {
762 applyCombineExtendingLoads(MI, Preferred);
763 return true;
764 }
765 return false;
766}
767
768static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
769 unsigned CandidateLoadOpc;
770 switch (ExtOpc) {
771 case TargetOpcode::G_ANYEXT:
772 CandidateLoadOpc = TargetOpcode::G_LOAD;
773 break;
774 case TargetOpcode::G_SEXT:
775 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
776 break;
777 case TargetOpcode::G_ZEXT:
778 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
779 break;
780 default:
781 llvm_unreachable("Unexpected extend opc");
782 }
783 return CandidateLoadOpc;
784}
785
787 MachineInstr &MI, PreferredTuple &Preferred) const {
788 // We match the loads and follow the uses to the extend instead of matching
789 // the extends and following the def to the load. This is because the load
790 // must remain in the same position for correctness (unless we also add code
791 // to find a safe place to sink it) whereas the extend is freely movable.
792 // It also prevents us from duplicating the load for the volatile case or just
793 // for performance.
794 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
795 if (!LoadMI)
796 return false;
797
798 Register LoadReg = LoadMI->getDstReg();
799
800 LLT LoadValueTy = MRI.getType(LoadReg);
801 if (!LoadValueTy.isScalar())
802 return false;
803
804 // Most architectures are going to legalize <s8 loads into at least a 1 byte
805 // load, and the MMOs can only describe memory accesses in multiples of bytes.
806 // If we try to perform extload combining on those, we can end up with
807 // %a(s8) = extload %ptr (load 1 byte from %ptr)
808 // ... which is an illegal extload instruction.
809 if (LoadValueTy.getSizeInBits() < 8)
810 return false;
811
812 // For non power-of-2 types, they will very likely be legalized into multiple
813 // loads. Don't bother trying to match them into extending loads.
815 return false;
816
817 // Find the preferred type aside from the any-extends (unless it's the only
818 // one) and non-extending ops. We'll emit an extending load to that type and
819 // and emit a variant of (extend (trunc X)) for the others according to the
820 // relative type sizes. At the same time, pick an extend to use based on the
821 // extend involved in the chosen type.
822 unsigned PreferredOpcode =
823 isa<GLoad>(&MI)
824 ? TargetOpcode::G_ANYEXT
825 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
826 Preferred = {LLT(), PreferredOpcode, nullptr};
827 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
828 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
829 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
830 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
831 const auto &MMO = LoadMI->getMMO();
832 // Don't do anything for atomics.
833 if (MMO.isAtomic())
834 continue;
835 // Check for legality.
836 if (!isPreLegalize()) {
837 LegalityQuery::MemDesc MMDesc(MMO);
838 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
839 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
840 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
841 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
842 .Action != LegalizeActions::Legal)
843 continue;
844 }
845 Preferred = ChoosePreferredUse(MI, Preferred,
846 MRI.getType(UseMI.getOperand(0).getReg()),
847 UseMI.getOpcode(), &UseMI);
848 }
849 }
850
851 // There were no extends
852 if (!Preferred.MI)
853 return false;
854 // It should be impossible to chose an extend without selecting a different
855 // type since by definition the result of an extend is larger.
856 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
857
858 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
859 return true;
860}
861
863 MachineInstr &MI, PreferredTuple &Preferred) const {
864 // Rewrite the load to the chosen extending load.
865 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
866
867 // Inserter to insert a truncate back to the original type at a given point
868 // with some basic CSE to limit truncate duplication to one per BB.
870 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
871 MachineBasicBlock::iterator InsertBefore,
872 MachineOperand &UseMO) {
873 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
874 if (PreviouslyEmitted) {
875 Observer.changingInstr(*UseMO.getParent());
876 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
877 Observer.changedInstr(*UseMO.getParent());
878 return;
879 }
880
881 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
882 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
883 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
884 EmittedInsns[InsertIntoBB] = NewMI;
885 replaceRegOpWith(MRI, UseMO, NewDstReg);
886 };
887
888 Observer.changingInstr(MI);
889 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
890 MI.setDesc(Builder.getTII().get(LoadOpc));
891
892 // Rewrite all the uses to fix up the types.
893 auto &LoadValue = MI.getOperand(0);
895 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
896
897 for (auto *UseMO : Uses) {
898 MachineInstr *UseMI = UseMO->getParent();
899
900 // If the extend is compatible with the preferred extend then we should fix
901 // up the type and extend so that it uses the preferred use.
902 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
903 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
904 Register UseDstReg = UseMI->getOperand(0).getReg();
905 MachineOperand &UseSrcMO = UseMI->getOperand(1);
906 const LLT UseDstTy = MRI.getType(UseDstReg);
907 if (UseDstReg != ChosenDstReg) {
908 if (Preferred.Ty == UseDstTy) {
909 // If the use has the same type as the preferred use, then merge
910 // the vregs and erase the extend. For example:
911 // %1:_(s8) = G_LOAD ...
912 // %2:_(s32) = G_SEXT %1(s8)
913 // %3:_(s32) = G_ANYEXT %1(s8)
914 // ... = ... %3(s32)
915 // rewrites to:
916 // %2:_(s32) = G_SEXTLOAD ...
917 // ... = ... %2(s32)
918 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
919 Observer.erasingInstr(*UseMO->getParent());
920 UseMO->getParent()->eraseFromParent();
921 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
922 // If the preferred size is smaller, then keep the extend but extend
923 // from the result of the extending load. For example:
924 // %1:_(s8) = G_LOAD ...
925 // %2:_(s32) = G_SEXT %1(s8)
926 // %3:_(s64) = G_ANYEXT %1(s8)
927 // ... = ... %3(s64)
928 /// rewrites to:
929 // %2:_(s32) = G_SEXTLOAD ...
930 // %3:_(s64) = G_ANYEXT %2:_(s32)
931 // ... = ... %3(s64)
932 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
933 } else {
934 // If the preferred size is large, then insert a truncate. For
935 // example:
936 // %1:_(s8) = G_LOAD ...
937 // %2:_(s64) = G_SEXT %1(s8)
938 // %3:_(s32) = G_ZEXT %1(s8)
939 // ... = ... %3(s32)
940 /// rewrites to:
941 // %2:_(s64) = G_SEXTLOAD ...
942 // %4:_(s8) = G_TRUNC %2:_(s32)
943 // %3:_(s64) = G_ZEXT %2:_(s8)
944 // ... = ... %3(s64)
945 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
946 InsertTruncAt);
947 }
948 continue;
949 }
950 // The use is (one of) the uses of the preferred use we chose earlier.
951 // We're going to update the load to def this value later so just erase
952 // the old extend.
953 Observer.erasingInstr(*UseMO->getParent());
954 UseMO->getParent()->eraseFromParent();
955 continue;
956 }
957
958 // The use isn't an extend. Truncate back to the type we originally loaded.
959 // This is free on many targets.
960 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
961 }
962
963 MI.getOperand(0).setReg(ChosenDstReg);
964 Observer.changedInstr(MI);
965}
966
968 BuildFnTy &MatchInfo) const {
969 assert(MI.getOpcode() == TargetOpcode::G_AND);
970
971 // If we have the following code:
972 // %mask = G_CONSTANT 255
973 // %ld = G_LOAD %ptr, (load s16)
974 // %and = G_AND %ld, %mask
975 //
976 // Try to fold it into
977 // %ld = G_ZEXTLOAD %ptr, (load s8)
978
979 Register Dst = MI.getOperand(0).getReg();
980 if (MRI.getType(Dst).isVector())
981 return false;
982
983 auto MaybeMask =
984 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
985 if (!MaybeMask)
986 return false;
987
988 APInt MaskVal = MaybeMask->Value;
989
990 if (!MaskVal.isMask())
991 return false;
992
993 Register SrcReg = MI.getOperand(1).getReg();
994 // Don't use getOpcodeDef() here since intermediate instructions may have
995 // multiple users.
996 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
997 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
998 return false;
999
1000 Register LoadReg = LoadMI->getDstReg();
1001 LLT RegTy = MRI.getType(LoadReg);
1002 Register PtrReg = LoadMI->getPointerReg();
1003 unsigned RegSize = RegTy.getSizeInBits();
1004 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
1005 unsigned MaskSizeBits = MaskVal.countr_one();
1006
1007 // The mask may not be larger than the in-memory type, as it might cover sign
1008 // extended bits
1009 if (MaskSizeBits > LoadSizeBits.getValue())
1010 return false;
1011
1012 // If the mask covers the whole destination register, there's nothing to
1013 // extend
1014 if (MaskSizeBits >= RegSize)
1015 return false;
1016
1017 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1018 // at least byte loads. Avoid creating such loads here
1019 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1020 return false;
1021
1022 const MachineMemOperand &MMO = LoadMI->getMMO();
1023 LegalityQuery::MemDesc MemDesc(MMO);
1024
1025 // Don't modify the memory access size if this is atomic/volatile, but we can
1026 // still adjust the opcode to indicate the high bit behavior.
1027 if (LoadMI->isSimple())
1028 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1029 else if (LoadSizeBits.getValue() > MaskSizeBits ||
1030 LoadSizeBits.getValue() == RegSize)
1031 return false;
1032
1033 // TODO: Could check if it's legal with the reduced or original memory size.
1035 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1036 return false;
1037
1038 MatchInfo = [=](MachineIRBuilder &B) {
1039 B.setInstrAndDebugLoc(*LoadMI);
1040 auto &MF = B.getMF();
1041 auto PtrInfo = MMO.getPointerInfo();
1042 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1043 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1044 LoadMI->eraseFromParent();
1045 };
1046 return true;
1047}
1048
1050 const MachineInstr &UseMI) const {
1051 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1052 "shouldn't consider debug uses");
1053 assert(DefMI.getParent() == UseMI.getParent());
1054 if (&DefMI == &UseMI)
1055 return true;
1056 const MachineBasicBlock &MBB = *DefMI.getParent();
1057 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1058 return &MI == &DefMI || &MI == &UseMI;
1059 });
1060 if (DefOrUse == MBB.end())
1061 llvm_unreachable("Block must contain both DefMI and UseMI!");
1062 return &*DefOrUse == &DefMI;
1063}
1064
1066 const MachineInstr &UseMI) const {
1067 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1068 "shouldn't consider debug uses");
1069 if (MDT)
1070 return MDT->dominates(&DefMI, &UseMI);
1071 else if (DefMI.getParent() != UseMI.getParent())
1072 return false;
1073
1074 return isPredecessor(DefMI, UseMI);
1075}
1076
1078 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1079 Register SrcReg = MI.getOperand(1).getReg();
1080 Register LoadUser = SrcReg;
1081
1082 if (MRI.getType(SrcReg).isVector())
1083 return false;
1084
1085 Register TruncSrc;
1086 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1087 LoadUser = TruncSrc;
1088
1089 uint64_t SizeInBits = MI.getOperand(2).getImm();
1090 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1091 // need any extend at all, just a truncate.
1092 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1093 // If truncating more than the original extended value, abort.
1094 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1095 if (TruncSrc &&
1096 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1097 return false;
1098 if (LoadSizeBits == SizeInBits)
1099 return true;
1100 }
1101 return false;
1102}
1103
1105 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1106 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1107 MI.eraseFromParent();
1108}
1109
1111 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1112 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1113
1114 Register DstReg = MI.getOperand(0).getReg();
1115 LLT RegTy = MRI.getType(DstReg);
1116
1117 // Only supports scalars for now.
1118 if (RegTy.isVector())
1119 return false;
1120
1121 Register SrcReg = MI.getOperand(1).getReg();
1122 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1123 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1124 return false;
1125
1126 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1127
1128 // If the sign extend extends from a narrower width than the load's width,
1129 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1130 // Avoid widening the load at all.
1131 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1132
1133 // Don't generate G_SEXTLOADs with a < 1 byte width.
1134 if (NewSizeBits < 8)
1135 return false;
1136 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1137 // anyway for most targets.
1138 if (!isPowerOf2_32(NewSizeBits))
1139 return false;
1140
1141 const MachineMemOperand &MMO = LoadDef->getMMO();
1142 LegalityQuery::MemDesc MMDesc(MMO);
1143
1144 // Don't modify the memory access size if this is atomic/volatile, but we can
1145 // still adjust the opcode to indicate the high bit behavior.
1146 if (LoadDef->isSimple())
1147 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1148 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1149 return false;
1150
1151 // TODO: Could check if it's legal with the reduced or original memory size.
1152 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1153 {MRI.getType(LoadDef->getDstReg()),
1154 MRI.getType(LoadDef->getPointerReg())},
1155 {MMDesc}}))
1156 return false;
1157
1158 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1159 return true;
1160}
1161
1163 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1164 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1165 Register LoadReg;
1166 unsigned ScalarSizeBits;
1167 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1168 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1169
1170 // If we have the following:
1171 // %ld = G_LOAD %ptr, (load 2)
1172 // %ext = G_SEXT_INREG %ld, 8
1173 // ==>
1174 // %ld = G_SEXTLOAD %ptr (load 1)
1175
1176 auto &MMO = LoadDef->getMMO();
1177 Builder.setInstrAndDebugLoc(*LoadDef);
1178 auto &MF = Builder.getMF();
1179 auto PtrInfo = MMO.getPointerInfo();
1180 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1181 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1182 LoadDef->getPointerReg(), *NewMMO);
1183 MI.eraseFromParent();
1184
1185 // Not all loads can be deleted, so make sure the old one is removed.
1186 LoadDef->eraseFromParent();
1187}
1188
1189/// Return true if 'MI' is a load or a store that may be fold it's address
1190/// operand into the load / store addressing mode.
1192 MachineRegisterInfo &MRI) {
1194 auto *MF = MI->getMF();
1195 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1196 if (!Addr)
1197 return false;
1198
1199 AM.HasBaseReg = true;
1200 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1201 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1202 else
1203 AM.Scale = 1; // [reg +/- reg]
1204
1205 return TLI.isLegalAddressingMode(
1206 MF->getDataLayout(), AM,
1207 getTypeForLLT(MI->getMMO().getMemoryType(),
1208 MF->getFunction().getContext()),
1209 MI->getMMO().getAddrSpace());
1210}
1211
1212static unsigned getIndexedOpc(unsigned LdStOpc) {
1213 switch (LdStOpc) {
1214 case TargetOpcode::G_LOAD:
1215 return TargetOpcode::G_INDEXED_LOAD;
1216 case TargetOpcode::G_STORE:
1217 return TargetOpcode::G_INDEXED_STORE;
1218 case TargetOpcode::G_ZEXTLOAD:
1219 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1220 case TargetOpcode::G_SEXTLOAD:
1221 return TargetOpcode::G_INDEXED_SEXTLOAD;
1222 default:
1223 llvm_unreachable("Unexpected opcode");
1224 }
1225}
1226
1227bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1228 // Check for legality.
1229 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1230 LLT Ty = MRI.getType(LdSt.getReg(0));
1231 LLT MemTy = LdSt.getMMO().getMemoryType();
1233 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1235 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1236 SmallVector<LLT> OpTys;
1237 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1238 OpTys = {PtrTy, Ty, Ty};
1239 else
1240 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1241
1242 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1243 return isLegal(Q);
1244}
1245
1247 "post-index-use-threshold", cl::Hidden, cl::init(32),
1248 cl::desc("Number of uses of a base pointer to check before it is no longer "
1249 "considered for post-indexing."));
1250
1251bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1253 bool &RematOffset) const {
1254 // We're looking for the following pattern, for either load or store:
1255 // %baseptr:_(p0) = ...
1256 // G_STORE %val(s64), %baseptr(p0)
1257 // %offset:_(s64) = G_CONSTANT i64 -256
1258 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1259 const auto &TLI = getTargetLowering();
1260
1261 Register Ptr = LdSt.getPointerReg();
1262 // If the store is the only use, don't bother.
1263 if (MRI.hasOneNonDBGUse(Ptr))
1264 return false;
1265
1266 if (!isIndexedLoadStoreLegal(LdSt))
1267 return false;
1268
1269 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1270 return false;
1271
1272 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1273 auto *PtrDef = MRI.getVRegDef(Ptr);
1274
1275 unsigned NumUsesChecked = 0;
1276 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1277 if (++NumUsesChecked > PostIndexUseThreshold)
1278 return false; // Try to avoid exploding compile time.
1279
1280 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1281 // The use itself might be dead. This can happen during combines if DCE
1282 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1283 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1284 continue;
1285
1286 // Check the user of this isn't the store, otherwise we'd be generate a
1287 // indexed store defining its own use.
1288 if (StoredValDef == &Use)
1289 continue;
1290
1291 Offset = PtrAdd->getOffsetReg();
1292 if (!ForceLegalIndexing &&
1293 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1294 /*IsPre*/ false, MRI))
1295 continue;
1296
1297 // Make sure the offset calculation is before the potentially indexed op.
1298 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1299 RematOffset = false;
1300 if (!dominates(*OffsetDef, LdSt)) {
1301 // If the offset however is just a G_CONSTANT, we can always just
1302 // rematerialize it where we need it.
1303 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1304 continue;
1305 RematOffset = true;
1306 }
1307
1308 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1309 if (&BasePtrUse == PtrDef)
1310 continue;
1311
1312 // If the user is a later load/store that can be post-indexed, then don't
1313 // combine this one.
1314 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1315 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1316 dominates(LdSt, *BasePtrLdSt) &&
1317 isIndexedLoadStoreLegal(*BasePtrLdSt))
1318 return false;
1319
1320 // Now we're looking for the key G_PTR_ADD instruction, which contains
1321 // the offset add that we want to fold.
1322 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1323 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1324 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1325 // If the use is in a different block, then we may produce worse code
1326 // due to the extra register pressure.
1327 if (BaseUseUse.getParent() != LdSt.getParent())
1328 return false;
1329
1330 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1331 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1332 return false;
1333 }
1334 if (!dominates(LdSt, BasePtrUse))
1335 return false; // All use must be dominated by the load/store.
1336 }
1337 }
1338
1339 Addr = PtrAdd->getReg(0);
1340 Base = PtrAdd->getBaseReg();
1341 return true;
1342 }
1343
1344 return false;
1345}
1346
1347bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1348 Register &Base,
1349 Register &Offset) const {
1350 auto &MF = *LdSt.getParent()->getParent();
1351 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1352
1353 Addr = LdSt.getPointerReg();
1354 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1355 MRI.hasOneNonDBGUse(Addr))
1356 return false;
1357
1358 if (!ForceLegalIndexing &&
1359 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1360 return false;
1361
1362 if (!isIndexedLoadStoreLegal(LdSt))
1363 return false;
1364
1365 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1366 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1367 return false;
1368
1369 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1370 // Would require a copy.
1371 if (Base == St->getValueReg())
1372 return false;
1373
1374 // We're expecting one use of Addr in MI, but it could also be the
1375 // value stored, which isn't actually dominated by the instruction.
1376 if (St->getValueReg() == Addr)
1377 return false;
1378 }
1379
1380 // Avoid increasing cross-block register pressure.
1381 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1382 if (AddrUse.getParent() != LdSt.getParent())
1383 return false;
1384
1385 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1386 // That might allow us to end base's liveness here by adjusting the constant.
1387 bool RealUse = false;
1388 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1389 if (!dominates(LdSt, AddrUse))
1390 return false; // All use must be dominated by the load/store.
1391
1392 // If Ptr may be folded in addressing mode of other use, then it's
1393 // not profitable to do this transformation.
1394 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1395 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1396 RealUse = true;
1397 } else {
1398 RealUse = true;
1399 }
1400 }
1401 return RealUse;
1402}
1403
1405 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1406 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1407
1408 // Check if there is a load that defines the vector being extracted from.
1409 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1410 if (!LoadMI)
1411 return false;
1412
1413 Register Vector = MI.getOperand(1).getReg();
1414 LLT VecEltTy = MRI.getType(Vector).getElementType();
1415
1416 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1417
1418 // Checking whether we should reduce the load width.
1419 if (!MRI.hasOneNonDBGUse(Vector))
1420 return false;
1421
1422 // Check if the defining load is simple.
1423 if (!LoadMI->isSimple())
1424 return false;
1425
1426 // If the vector element type is not a multiple of a byte then we are unable
1427 // to correctly compute an address to load only the extracted element as a
1428 // scalar.
1429 if (!VecEltTy.isByteSized())
1430 return false;
1431
1432 // Check for load fold barriers between the extraction and the load.
1433 if (MI.getParent() != LoadMI->getParent())
1434 return false;
1435 const unsigned MaxIter = 20;
1436 unsigned Iter = 0;
1437 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1438 if (II->isLoadFoldBarrier())
1439 return false;
1440 if (Iter++ == MaxIter)
1441 return false;
1442 }
1443
1444 // Check if the new load that we are going to create is legal
1445 // if we are in the post-legalization phase.
1446 MachineMemOperand MMO = LoadMI->getMMO();
1447 Align Alignment = MMO.getAlign();
1448 MachinePointerInfo PtrInfo;
1450
1451 // Finding the appropriate PtrInfo if offset is a known constant.
1452 // This is required to create the memory operand for the narrowed load.
1453 // This machine memory operand object helps us infer about legality
1454 // before we proceed to combine the instruction.
1455 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1456 int Elt = CVal->getZExtValue();
1457 // FIXME: should be (ABI size)*Elt.
1458 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1459 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1460 } else {
1461 // Discard the pointer info except the address space because the memory
1462 // operand can't represent this new access since the offset is variable.
1463 Offset = VecEltTy.getSizeInBits() / 8;
1465 }
1466
1467 Alignment = commonAlignment(Alignment, Offset);
1468
1469 Register VecPtr = LoadMI->getPointerReg();
1470 LLT PtrTy = MRI.getType(VecPtr);
1471
1472 MachineFunction &MF = *MI.getMF();
1473 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1474
1475 LegalityQuery::MemDesc MMDesc(*NewMMO);
1476
1478 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1479 return false;
1480
1481 // Load must be allowed and fast on the target.
1483 auto &DL = MF.getDataLayout();
1484 unsigned Fast = 0;
1485 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1486 &Fast) ||
1487 !Fast)
1488 return false;
1489
1490 Register Result = MI.getOperand(0).getReg();
1491 Register Index = MI.getOperand(2).getReg();
1492
1493 MatchInfo = [=](MachineIRBuilder &B) {
1494 GISelObserverWrapper DummyObserver;
1495 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1496 //// Get pointer to the vector element.
1497 Register finalPtr = Helper.getVectorElementPointer(
1498 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1499 Index);
1500 // New G_LOAD instruction.
1501 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1502 // Remove original GLOAD instruction.
1503 LoadMI->eraseFromParent();
1504 };
1505
1506 return true;
1507}
1508
1510 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1511 auto &LdSt = cast<GLoadStore>(MI);
1512
1513 if (LdSt.isAtomic())
1514 return false;
1515
1516 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1517 MatchInfo.Offset);
1518 if (!MatchInfo.IsPre &&
1519 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1520 MatchInfo.Offset, MatchInfo.RematOffset))
1521 return false;
1522
1523 return true;
1524}
1525
1527 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1528 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1529 unsigned Opcode = MI.getOpcode();
1530 bool IsStore = Opcode == TargetOpcode::G_STORE;
1531 unsigned NewOpcode = getIndexedOpc(Opcode);
1532
1533 // If the offset constant didn't happen to dominate the load/store, we can
1534 // just clone it as needed.
1535 if (MatchInfo.RematOffset) {
1536 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1537 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1538 *OldCst->getOperand(1).getCImm());
1539 MatchInfo.Offset = NewCst.getReg(0);
1540 }
1541
1542 auto MIB = Builder.buildInstr(NewOpcode);
1543 if (IsStore) {
1544 MIB.addDef(MatchInfo.Addr);
1545 MIB.addUse(MI.getOperand(0).getReg());
1546 } else {
1547 MIB.addDef(MI.getOperand(0).getReg());
1548 MIB.addDef(MatchInfo.Addr);
1549 }
1550
1551 MIB.addUse(MatchInfo.Base);
1552 MIB.addUse(MatchInfo.Offset);
1553 MIB.addImm(MatchInfo.IsPre);
1554 MIB->cloneMemRefs(*MI.getMF(), MI);
1555 MI.eraseFromParent();
1556 AddrDef.eraseFromParent();
1557
1558 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1559}
1560
1562 MachineInstr *&OtherMI) const {
1563 unsigned Opcode = MI.getOpcode();
1564 bool IsDiv, IsSigned;
1565
1566 switch (Opcode) {
1567 default:
1568 llvm_unreachable("Unexpected opcode!");
1569 case TargetOpcode::G_SDIV:
1570 case TargetOpcode::G_UDIV: {
1571 IsDiv = true;
1572 IsSigned = Opcode == TargetOpcode::G_SDIV;
1573 break;
1574 }
1575 case TargetOpcode::G_SREM:
1576 case TargetOpcode::G_UREM: {
1577 IsDiv = false;
1578 IsSigned = Opcode == TargetOpcode::G_SREM;
1579 break;
1580 }
1581 }
1582
1583 Register Src1 = MI.getOperand(1).getReg();
1584 unsigned DivOpcode, RemOpcode, DivremOpcode;
1585 if (IsSigned) {
1586 DivOpcode = TargetOpcode::G_SDIV;
1587 RemOpcode = TargetOpcode::G_SREM;
1588 DivremOpcode = TargetOpcode::G_SDIVREM;
1589 } else {
1590 DivOpcode = TargetOpcode::G_UDIV;
1591 RemOpcode = TargetOpcode::G_UREM;
1592 DivremOpcode = TargetOpcode::G_UDIVREM;
1593 }
1594
1595 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1596 return false;
1597
1598 // Combine:
1599 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1600 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1601 // into:
1602 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1603
1604 // Combine:
1605 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1606 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1607 // into:
1608 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1609
1610 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1611 if (MI.getParent() == UseMI.getParent() &&
1612 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1613 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1614 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1615 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1616 OtherMI = &UseMI;
1617 return true;
1618 }
1619 }
1620
1621 return false;
1622}
1623
1625 MachineInstr *&OtherMI) const {
1626 unsigned Opcode = MI.getOpcode();
1627 assert(OtherMI && "OtherMI shouldn't be empty.");
1628
1629 Register DestDivReg, DestRemReg;
1630 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1631 DestDivReg = MI.getOperand(0).getReg();
1632 DestRemReg = OtherMI->getOperand(0).getReg();
1633 } else {
1634 DestDivReg = OtherMI->getOperand(0).getReg();
1635 DestRemReg = MI.getOperand(0).getReg();
1636 }
1637
1638 bool IsSigned =
1639 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1640
1641 // Check which instruction is first in the block so we don't break def-use
1642 // deps by "moving" the instruction incorrectly. Also keep track of which
1643 // instruction is first so we pick it's operands, avoiding use-before-def
1644 // bugs.
1645 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1646 Builder.setInstrAndDebugLoc(*FirstInst);
1647
1648 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1649 : TargetOpcode::G_UDIVREM,
1650 {DestDivReg, DestRemReg},
1651 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1652 MI.eraseFromParent();
1653 OtherMI->eraseFromParent();
1654}
1655
1657 MachineInstr &MI, MachineInstr *&BrCond) const {
1658 assert(MI.getOpcode() == TargetOpcode::G_BR);
1659
1660 // Try to match the following:
1661 // bb1:
1662 // G_BRCOND %c1, %bb2
1663 // G_BR %bb3
1664 // bb2:
1665 // ...
1666 // bb3:
1667
1668 // The above pattern does not have a fall through to the successor bb2, always
1669 // resulting in a branch no matter which path is taken. Here we try to find
1670 // and replace that pattern with conditional branch to bb3 and otherwise
1671 // fallthrough to bb2. This is generally better for branch predictors.
1672
1673 MachineBasicBlock *MBB = MI.getParent();
1675 if (BrIt == MBB->begin())
1676 return false;
1677 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1678
1679 BrCond = &*std::prev(BrIt);
1680 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1681 return false;
1682
1683 // Check that the next block is the conditional branch target. Also make sure
1684 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1685 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1686 return BrCondTarget != MI.getOperand(0).getMBB() &&
1687 MBB->isLayoutSuccessor(BrCondTarget);
1688}
1689
1691 MachineInstr &MI, MachineInstr *&BrCond) const {
1692 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1693 Builder.setInstrAndDebugLoc(*BrCond);
1694 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1695 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1696 // this to i1 only since we might not know for sure what kind of
1697 // compare generated the condition value.
1698 auto True = Builder.buildConstant(
1699 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1700 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1701
1702 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1703 Observer.changingInstr(MI);
1704 MI.getOperand(0).setMBB(FallthroughBB);
1705 Observer.changedInstr(MI);
1706
1707 // Change the conditional branch to use the inverted condition and
1708 // new target block.
1709 Observer.changingInstr(*BrCond);
1710 BrCond->getOperand(0).setReg(Xor.getReg(0));
1711 BrCond->getOperand(1).setMBB(BrTarget);
1712 Observer.changedInstr(*BrCond);
1713}
1714
1716 MachineIRBuilder HelperBuilder(MI);
1717 GISelObserverWrapper DummyObserver;
1718 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1719 return Helper.lowerMemcpyInline(MI) ==
1721}
1722
1724 unsigned MaxLen) const {
1725 MachineIRBuilder HelperBuilder(MI);
1726 GISelObserverWrapper DummyObserver;
1727 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1728 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1730}
1731
1733 const MachineRegisterInfo &MRI,
1734 const APFloat &Val) {
1735 APFloat Result(Val);
1736 switch (MI.getOpcode()) {
1737 default:
1738 llvm_unreachable("Unexpected opcode!");
1739 case TargetOpcode::G_FNEG: {
1740 Result.changeSign();
1741 return Result;
1742 }
1743 case TargetOpcode::G_FABS: {
1744 Result.clearSign();
1745 return Result;
1746 }
1747 case TargetOpcode::G_FCEIL:
1748 Result.roundToIntegral(APFloat::rmTowardPositive);
1749 return Result;
1750 case TargetOpcode::G_FFLOOR:
1751 Result.roundToIntegral(APFloat::rmTowardNegative);
1752 return Result;
1753 case TargetOpcode::G_INTRINSIC_TRUNC:
1754 Result.roundToIntegral(APFloat::rmTowardZero);
1755 return Result;
1756 case TargetOpcode::G_INTRINSIC_ROUND:
1757 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1758 return Result;
1759 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1760 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1761 return Result;
1762 case TargetOpcode::G_FRINT:
1763 case TargetOpcode::G_FNEARBYINT:
1764 // Use default rounding mode (round to nearest, ties to even)
1765 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1766 return Result;
1767 case TargetOpcode::G_FPEXT:
1768 case TargetOpcode::G_FPTRUNC: {
1769 bool Unused;
1770 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1772 &Unused);
1773 return Result;
1774 }
1775 case TargetOpcode::G_FSQRT: {
1776 bool Unused;
1778 &Unused);
1779 Result = APFloat(sqrt(Result.convertToDouble()));
1780 break;
1781 }
1782 case TargetOpcode::G_FLOG2: {
1783 bool Unused;
1785 &Unused);
1786 Result = APFloat(log2(Result.convertToDouble()));
1787 break;
1788 }
1789 }
1790 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1791 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1792 // `G_FLOG2` reach here.
1793 bool Unused;
1794 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1795 return Result;
1796}
1797
1799 MachineInstr &MI, const ConstantFP *Cst) const {
1800 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1801 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1802 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1803 MI.eraseFromParent();
1804}
1805
1807 PtrAddChain &MatchInfo) const {
1808 // We're trying to match the following pattern:
1809 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1810 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1811 // -->
1812 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1813
1814 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1815 return false;
1816
1817 Register Add2 = MI.getOperand(1).getReg();
1818 Register Imm1 = MI.getOperand(2).getReg();
1819 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1820 if (!MaybeImmVal)
1821 return false;
1822
1823 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1824 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1825 return false;
1826
1827 Register Base = Add2Def->getOperand(1).getReg();
1828 Register Imm2 = Add2Def->getOperand(2).getReg();
1829 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1830 if (!MaybeImm2Val)
1831 return false;
1832
1833 // Check if the new combined immediate forms an illegal addressing mode.
1834 // Do not combine if it was legal before but would get illegal.
1835 // To do so, we need to find a load/store user of the pointer to get
1836 // the access type.
1837 Type *AccessTy = nullptr;
1838 auto &MF = *MI.getMF();
1839 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1840 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1841 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1842 MF.getFunction().getContext());
1843 break;
1844 }
1845 }
1847 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1848 AMNew.BaseOffs = CombinedImm.getSExtValue();
1849 if (AccessTy) {
1850 AMNew.HasBaseReg = true;
1852 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1853 AMOld.HasBaseReg = true;
1854 unsigned AS = MRI.getType(Add2).getAddressSpace();
1855 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1856 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1857 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1858 return false;
1859 }
1860
1861 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1862 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1863 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1864 // largest signed integer that fits into the index type, which is the maximum
1865 // size of allocated objects according to the IR Language Reference.
1866 unsigned PtrAddFlags = MI.getFlags();
1867 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1868 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1869 bool IsInBounds =
1870 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1871 unsigned Flags = 0;
1872 if (IsNoUWrap)
1874 if (IsInBounds) {
1877 }
1878
1879 // Pass the combined immediate to the apply function.
1880 MatchInfo.Imm = AMNew.BaseOffs;
1881 MatchInfo.Base = Base;
1882 MatchInfo.Bank = getRegBank(Imm2);
1883 MatchInfo.Flags = Flags;
1884 return true;
1885}
1886
1888 PtrAddChain &MatchInfo) const {
1889 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1890 MachineIRBuilder MIB(MI);
1891 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1892 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1893 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1894 Observer.changingInstr(MI);
1895 MI.getOperand(1).setReg(MatchInfo.Base);
1896 MI.getOperand(2).setReg(NewOffset.getReg(0));
1897 MI.setFlags(MatchInfo.Flags);
1898 Observer.changedInstr(MI);
1899}
1900
1902 RegisterImmPair &MatchInfo) const {
1903 // We're trying to match the following pattern with any of
1904 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1905 // %t1 = SHIFT %base, G_CONSTANT imm1
1906 // %root = SHIFT %t1, G_CONSTANT imm2
1907 // -->
1908 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1909
1910 unsigned Opcode = MI.getOpcode();
1911 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1912 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1913 Opcode == TargetOpcode::G_USHLSAT) &&
1914 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1915
1916 Register Shl2 = MI.getOperand(1).getReg();
1917 Register Imm1 = MI.getOperand(2).getReg();
1918 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1919 if (!MaybeImmVal)
1920 return false;
1921
1922 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1923 if (Shl2Def->getOpcode() != Opcode)
1924 return false;
1925
1926 Register Base = Shl2Def->getOperand(1).getReg();
1927 Register Imm2 = Shl2Def->getOperand(2).getReg();
1928 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1929 if (!MaybeImm2Val)
1930 return false;
1931
1932 // Pass the combined immediate to the apply function.
1933 MatchInfo.Imm =
1934 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1935 MatchInfo.Reg = Base;
1936
1937 // There is no simple replacement for a saturating unsigned left shift that
1938 // exceeds the scalar size.
1939 if (Opcode == TargetOpcode::G_USHLSAT &&
1940 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1941 return false;
1942
1943 return true;
1944}
1945
1947 RegisterImmPair &MatchInfo) const {
1948 unsigned Opcode = MI.getOpcode();
1949 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1950 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1951 Opcode == TargetOpcode::G_USHLSAT) &&
1952 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1953
1954 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1955 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1956 auto Imm = MatchInfo.Imm;
1957
1958 if (Imm >= ScalarSizeInBits) {
1959 // Any logical shift that exceeds scalar size will produce zero.
1960 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1961 Builder.buildConstant(MI.getOperand(0), 0);
1962 MI.eraseFromParent();
1963 return;
1964 }
1965 // Arithmetic shift and saturating signed left shift have no effect beyond
1966 // scalar size.
1967 Imm = ScalarSizeInBits - 1;
1968 }
1969
1970 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1971 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1972 Observer.changingInstr(MI);
1973 MI.getOperand(1).setReg(MatchInfo.Reg);
1974 MI.getOperand(2).setReg(NewImm);
1975 Observer.changedInstr(MI);
1976}
1977
1979 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1980 // We're trying to match the following pattern with any of
1981 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1982 // with any of G_AND/G_OR/G_XOR logic instructions.
1983 // %t1 = SHIFT %X, G_CONSTANT C0
1984 // %t2 = LOGIC %t1, %Y
1985 // %root = SHIFT %t2, G_CONSTANT C1
1986 // -->
1987 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1988 // %t4 = SHIFT %Y, G_CONSTANT C1
1989 // %root = LOGIC %t3, %t4
1990 unsigned ShiftOpcode = MI.getOpcode();
1991 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1992 ShiftOpcode == TargetOpcode::G_ASHR ||
1993 ShiftOpcode == TargetOpcode::G_LSHR ||
1994 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1995 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1996 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1997
1998 // Match a one-use bitwise logic op.
1999 Register LogicDest = MI.getOperand(1).getReg();
2000 if (!MRI.hasOneNonDBGUse(LogicDest))
2001 return false;
2002
2003 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
2004 unsigned LogicOpcode = LogicMI->getOpcode();
2005 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
2006 LogicOpcode != TargetOpcode::G_XOR)
2007 return false;
2008
2009 // Find a matching one-use shift by constant.
2010 const Register C1 = MI.getOperand(2).getReg();
2011 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
2012 if (!MaybeImmVal || MaybeImmVal->Value == 0)
2013 return false;
2014
2015 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
2016
2017 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
2018 // Shift should match previous one and should be a one-use.
2019 if (MI->getOpcode() != ShiftOpcode ||
2020 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2021 return false;
2022
2023 // Must be a constant.
2024 auto MaybeImmVal =
2025 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
2026 if (!MaybeImmVal)
2027 return false;
2028
2029 ShiftVal = MaybeImmVal->Value.getSExtValue();
2030 return true;
2031 };
2032
2033 // Logic ops are commutative, so check each operand for a match.
2034 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
2035 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
2036 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2037 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2038 uint64_t C0Val;
2039
2040 if (matchFirstShift(LogicMIOp1, C0Val)) {
2041 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2042 MatchInfo.Shift2 = LogicMIOp1;
2043 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2044 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2045 MatchInfo.Shift2 = LogicMIOp2;
2046 } else
2047 return false;
2048
2049 MatchInfo.ValSum = C0Val + C1Val;
2050
2051 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2052 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2053 return false;
2054
2055 MatchInfo.Logic = LogicMI;
2056 return true;
2057}
2058
2060 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2061 unsigned Opcode = MI.getOpcode();
2062 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2063 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2064 Opcode == TargetOpcode::G_SSHLSAT) &&
2065 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2066
2067 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2068 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2069
2070 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2071
2072 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2073 Register Shift1 =
2074 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2075
2076 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2077 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2078 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2079 // remove old shift1. And it will cause crash later. So erase it earlier to
2080 // avoid the crash.
2081 MatchInfo.Shift2->eraseFromParent();
2082
2083 Register Shift2Const = MI.getOperand(2).getReg();
2084 Register Shift2 = Builder
2085 .buildInstr(Opcode, {DestType},
2086 {MatchInfo.LogicNonShiftReg, Shift2Const})
2087 .getReg(0);
2088
2089 Register Dest = MI.getOperand(0).getReg();
2090 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2091
2092 // This was one use so it's safe to remove it.
2093 MatchInfo.Logic->eraseFromParent();
2094
2095 MI.eraseFromParent();
2096}
2097
2099 BuildFnTy &MatchInfo) const {
2100 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2101 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2102 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2103 auto &Shl = cast<GenericMachineInstr>(MI);
2104 Register DstReg = Shl.getReg(0);
2105 Register SrcReg = Shl.getReg(1);
2106 Register ShiftReg = Shl.getReg(2);
2107 Register X, C1;
2108
2109 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2110 return false;
2111
2112 if (!mi_match(SrcReg, MRI,
2114 m_GOr(m_Reg(X), m_Reg(C1))))))
2115 return false;
2116
2117 APInt C1Val, C2Val;
2118 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2119 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2120 return false;
2121
2122 auto *SrcDef = MRI.getVRegDef(SrcReg);
2123 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2124 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2125 LLT SrcTy = MRI.getType(SrcReg);
2126 MatchInfo = [=](MachineIRBuilder &B) {
2127 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2128 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2129 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2130 };
2131 return true;
2132}
2133
2135 LshrOfTruncOfLshr &MatchInfo,
2136 MachineInstr &ShiftMI) const {
2137 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2138
2139 Register N0 = MI.getOperand(1).getReg();
2140 Register N1 = MI.getOperand(2).getReg();
2141 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2142
2143 APInt N1C, N001C;
2144 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2145 return false;
2146 auto N001 = ShiftMI.getOperand(2).getReg();
2147 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2148 return false;
2149
2150 if (N001C.getBitWidth() > N1C.getBitWidth())
2151 N1C = N1C.zext(N001C.getBitWidth());
2152 else
2153 N001C = N001C.zext(N1C.getBitWidth());
2154
2155 Register InnerShift = ShiftMI.getOperand(0).getReg();
2156 LLT InnerShiftTy = MRI.getType(InnerShift);
2157 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2158 if ((N1C + N001C).ult(InnerShiftSize)) {
2159 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2160 MatchInfo.ShiftAmt = N1C + N001C;
2161 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2162 MatchInfo.InnerShiftTy = InnerShiftTy;
2163
2164 if ((N001C + OpSizeInBits) == InnerShiftSize)
2165 return true;
2166 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2167 MatchInfo.Mask = true;
2168 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2169 return true;
2170 }
2171 }
2172 return false;
2173}
2174
2176 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2177 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2178
2179 Register Dst = MI.getOperand(0).getReg();
2180 auto ShiftAmt =
2181 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2182 auto Shift =
2183 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2184 if (MatchInfo.Mask == true) {
2185 APInt MaskVal =
2187 MatchInfo.MaskVal.getZExtValue());
2188 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2189 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2190 Builder.buildTrunc(Dst, And);
2191 } else
2192 Builder.buildTrunc(Dst, Shift);
2193 MI.eraseFromParent();
2194}
2195
2197 unsigned &ShiftVal) const {
2198 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2199 auto MaybeImmVal =
2200 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2201 if (!MaybeImmVal)
2202 return false;
2203
2204 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2205 return (static_cast<int32_t>(ShiftVal) != -1);
2206}
2207
2209 unsigned &ShiftVal) const {
2210 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2211 MachineIRBuilder MIB(MI);
2212 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2213 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2214 Observer.changingInstr(MI);
2215 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2216 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2217 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2219 Observer.changedInstr(MI);
2220}
2221
2223 BuildFnTy &MatchInfo) const {
2224 GSub &Sub = cast<GSub>(MI);
2225
2226 LLT Ty = MRI.getType(Sub.getReg(0));
2227
2228 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2229 return false;
2230
2232 return false;
2233
2234 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2235
2236 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2237 auto NegCst = B.buildConstant(Ty, -Imm);
2238 Observer.changingInstr(MI);
2239 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2240 MI.getOperand(2).setReg(NegCst.getReg(0));
2242 if (Imm.isMinSignedValue())
2244 Observer.changedInstr(MI);
2245 };
2246 return true;
2247}
2248
2249// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2251 RegisterImmPair &MatchData) const {
2252 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2253 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2254 return false;
2255
2256 Register LHS = MI.getOperand(1).getReg();
2257
2258 Register ExtSrc;
2259 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2260 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2261 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2262 return false;
2263
2264 Register RHS = MI.getOperand(2).getReg();
2265 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2266 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2267 if (!MaybeShiftAmtVal)
2268 return false;
2269
2270 if (LI) {
2271 LLT SrcTy = MRI.getType(ExtSrc);
2272
2273 // We only really care about the legality with the shifted value. We can
2274 // pick any type the constant shift amount, so ask the target what to
2275 // use. Otherwise we would have to guess and hope it is reported as legal.
2276 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2277 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2278 return false;
2279 }
2280
2281 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2282 MatchData.Reg = ExtSrc;
2283 MatchData.Imm = ShiftAmt;
2284
2285 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2286 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2287 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2288}
2289
2291 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2292 Register ExtSrcReg = MatchData.Reg;
2293 int64_t ShiftAmtVal = MatchData.Imm;
2294
2295 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2296 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2297 auto NarrowShift =
2298 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2299 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2300 MI.eraseFromParent();
2301}
2302
2304 Register &MatchInfo) const {
2306 SmallVector<Register, 16> MergedValues;
2307 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2308 MergedValues.emplace_back(Merge.getSourceReg(I));
2309
2310 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2311 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2312 return false;
2313
2314 for (unsigned I = 0; I < MergedValues.size(); ++I)
2315 if (MergedValues[I] != Unmerge->getReg(I))
2316 return false;
2317
2318 MatchInfo = Unmerge->getSourceReg();
2319 return true;
2320}
2321
2323 const MachineRegisterInfo &MRI) {
2324 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2325 ;
2326
2327 return Reg;
2328}
2329
2331 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2332 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2333 "Expected an unmerge");
2334 auto &Unmerge = cast<GUnmerge>(MI);
2335 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2336
2337 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2338 if (!SrcInstr)
2339 return false;
2340
2341 // Check the source type of the merge.
2342 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2343 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2344 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2345 if (SrcMergeTy != Dst0Ty && !SameSize)
2346 return false;
2347 // They are the same now (modulo a bitcast).
2348 // We can collect all the src registers.
2349 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2350 Operands.push_back(SrcInstr->getSourceReg(Idx));
2351 return true;
2352}
2353
2355 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2356 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2357 "Expected an unmerge");
2358 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2359 "Not enough operands to replace all defs");
2360 unsigned NumElems = MI.getNumOperands() - 1;
2361
2362 LLT SrcTy = MRI.getType(Operands[0]);
2363 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2364 bool CanReuseInputDirectly = DstTy == SrcTy;
2365 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2366 Register DstReg = MI.getOperand(Idx).getReg();
2367 Register SrcReg = Operands[Idx];
2368
2369 // This combine may run after RegBankSelect, so we need to be aware of
2370 // register banks.
2371 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2372 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2373 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2374 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2375 }
2376
2377 if (CanReuseInputDirectly)
2378 replaceRegWith(MRI, DstReg, SrcReg);
2379 else
2380 Builder.buildCast(DstReg, SrcReg);
2381 }
2382 MI.eraseFromParent();
2383}
2384
2386 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2387 unsigned SrcIdx = MI.getNumOperands() - 1;
2388 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2389 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2390 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2391 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2392 return false;
2393 // Break down the big constant in smaller ones.
2394 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2395 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2396 ? CstVal.getCImm()->getValue()
2397 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2398
2399 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2400 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2401 // Unmerge a constant.
2402 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2403 Csts.emplace_back(Val.trunc(ShiftAmt));
2404 Val = Val.lshr(ShiftAmt);
2405 }
2406
2407 return true;
2408}
2409
2411 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2412 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2413 "Expected an unmerge");
2414 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2415 "Not enough operands to replace all defs");
2416 unsigned NumElems = MI.getNumOperands() - 1;
2417 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2418 Register DstReg = MI.getOperand(Idx).getReg();
2419 Builder.buildConstant(DstReg, Csts[Idx]);
2420 }
2421
2422 MI.eraseFromParent();
2423}
2424
2427 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2428 unsigned SrcIdx = MI.getNumOperands() - 1;
2429 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2430 MatchInfo = [&MI](MachineIRBuilder &B) {
2431 unsigned NumElems = MI.getNumOperands() - 1;
2432 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2433 Register DstReg = MI.getOperand(Idx).getReg();
2434 B.buildUndef(DstReg);
2435 }
2436 };
2437 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2438}
2439
2441 MachineInstr &MI) const {
2442 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2443 "Expected an unmerge");
2444 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2445 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2446 return false;
2447 // Check that all the lanes are dead except the first one.
2448 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2449 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2450 return false;
2451 }
2452 return true;
2453}
2454
2456 MachineInstr &MI) const {
2457 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2458 Register Dst0Reg = MI.getOperand(0).getReg();
2459 Builder.buildTrunc(Dst0Reg, SrcReg);
2460 MI.eraseFromParent();
2461}
2462
2464 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2465 "Expected an unmerge");
2466 Register Dst0Reg = MI.getOperand(0).getReg();
2467 LLT Dst0Ty = MRI.getType(Dst0Reg);
2468 // G_ZEXT on vector applies to each lane, so it will
2469 // affect all destinations. Therefore we won't be able
2470 // to simplify the unmerge to just the first definition.
2471 if (Dst0Ty.isVector())
2472 return false;
2473 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2474 LLT SrcTy = MRI.getType(SrcReg);
2475 if (SrcTy.isVector())
2476 return false;
2477
2478 Register ZExtSrcReg;
2479 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2480 return false;
2481
2482 // Finally we can replace the first definition with
2483 // a zext of the source if the definition is big enough to hold
2484 // all of ZExtSrc bits.
2485 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2486 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2487}
2488
2490 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2491 "Expected an unmerge");
2492
2493 Register Dst0Reg = MI.getOperand(0).getReg();
2494
2495 MachineInstr *ZExtInstr =
2496 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2497 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2498 "Expecting a G_ZEXT");
2499
2500 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2501 LLT Dst0Ty = MRI.getType(Dst0Reg);
2502 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2503
2504 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2505 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2506 } else {
2507 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2508 "ZExt src doesn't fit in destination");
2509 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2510 }
2511
2512 Register ZeroReg;
2513 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2514 if (!ZeroReg)
2515 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2516 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2517 }
2518 MI.eraseFromParent();
2519}
2520
2522 unsigned TargetShiftSize,
2523 unsigned &ShiftVal) const {
2524 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2525 MI.getOpcode() == TargetOpcode::G_LSHR ||
2526 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2527
2528 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2529 if (Ty.isVector()) // TODO:
2530 return false;
2531
2532 // Don't narrow further than the requested size.
2533 unsigned Size = Ty.getSizeInBits();
2534 if (Size <= TargetShiftSize)
2535 return false;
2536
2537 auto MaybeImmVal =
2538 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2539 if (!MaybeImmVal)
2540 return false;
2541
2542 ShiftVal = MaybeImmVal->Value.getSExtValue();
2543 return ShiftVal >= Size / 2 && ShiftVal < Size;
2544}
2545
2547 MachineInstr &MI, const unsigned &ShiftVal) const {
2548 Register DstReg = MI.getOperand(0).getReg();
2549 Register SrcReg = MI.getOperand(1).getReg();
2550 LLT Ty = MRI.getType(SrcReg);
2551 unsigned Size = Ty.getSizeInBits();
2552 unsigned HalfSize = Size / 2;
2553 assert(ShiftVal >= HalfSize);
2554
2555 LLT HalfTy = LLT::scalar(HalfSize);
2556
2557 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2558 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2559
2560 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2561 Register Narrowed = Unmerge.getReg(1);
2562
2563 // dst = G_LSHR s64:x, C for C >= 32
2564 // =>
2565 // lo, hi = G_UNMERGE_VALUES x
2566 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2567
2568 if (NarrowShiftAmt != 0) {
2569 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2570 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2571 }
2572
2573 auto Zero = Builder.buildConstant(HalfTy, 0);
2574 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2575 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2576 Register Narrowed = Unmerge.getReg(0);
2577 // dst = G_SHL s64:x, C for C >= 32
2578 // =>
2579 // lo, hi = G_UNMERGE_VALUES x
2580 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2581 if (NarrowShiftAmt != 0) {
2582 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2583 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2584 }
2585
2586 auto Zero = Builder.buildConstant(HalfTy, 0);
2587 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2588 } else {
2589 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2590 auto Hi = Builder.buildAShr(
2591 HalfTy, Unmerge.getReg(1),
2592 Builder.buildConstant(HalfTy, HalfSize - 1));
2593
2594 if (ShiftVal == HalfSize) {
2595 // (G_ASHR i64:x, 32) ->
2596 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2597 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2598 } else if (ShiftVal == Size - 1) {
2599 // Don't need a second shift.
2600 // (G_ASHR i64:x, 63) ->
2601 // %narrowed = (G_ASHR hi_32(x), 31)
2602 // G_MERGE_VALUES %narrowed, %narrowed
2603 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2604 } else {
2605 auto Lo = Builder.buildAShr(
2606 HalfTy, Unmerge.getReg(1),
2607 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2608
2609 // (G_ASHR i64:x, C) ->, for C >= 32
2610 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2611 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2612 }
2613 }
2614
2615 MI.eraseFromParent();
2616}
2617
2619 MachineInstr &MI, unsigned TargetShiftAmount) const {
2620 unsigned ShiftAmt;
2621 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2622 applyCombineShiftToUnmerge(MI, ShiftAmt);
2623 return true;
2624 }
2625
2626 return false;
2627}
2628
2630 Register &Reg) const {
2631 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2632 Register DstReg = MI.getOperand(0).getReg();
2633 LLT DstTy = MRI.getType(DstReg);
2634 Register SrcReg = MI.getOperand(1).getReg();
2635 return mi_match(SrcReg, MRI,
2636 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2637}
2638
2640 Register &Reg) const {
2641 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2642 Register DstReg = MI.getOperand(0).getReg();
2643 Builder.buildCopy(DstReg, Reg);
2644 MI.eraseFromParent();
2645}
2646
2648 Register &Reg) const {
2649 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2650 Register DstReg = MI.getOperand(0).getReg();
2651 Builder.buildZExtOrTrunc(DstReg, Reg);
2652 MI.eraseFromParent();
2653}
2654
2656 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2657 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2658 Register LHS = MI.getOperand(1).getReg();
2659 Register RHS = MI.getOperand(2).getReg();
2660 LLT IntTy = MRI.getType(LHS);
2661
2662 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2663 // instruction.
2664 PtrReg.second = false;
2665 for (Register SrcReg : {LHS, RHS}) {
2666 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2667 // Don't handle cases where the integer is implicitly converted to the
2668 // pointer width.
2669 LLT PtrTy = MRI.getType(PtrReg.first);
2670 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2671 return true;
2672 }
2673
2674 PtrReg.second = true;
2675 }
2676
2677 return false;
2678}
2679
2681 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2682 Register Dst = MI.getOperand(0).getReg();
2683 Register LHS = MI.getOperand(1).getReg();
2684 Register RHS = MI.getOperand(2).getReg();
2685
2686 const bool DoCommute = PtrReg.second;
2687 if (DoCommute)
2688 std::swap(LHS, RHS);
2689 LHS = PtrReg.first;
2690
2691 LLT PtrTy = MRI.getType(LHS);
2692
2693 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2694 Builder.buildPtrToInt(Dst, PtrAdd);
2695 MI.eraseFromParent();
2696}
2697
2699 APInt &NewCst) const {
2700 auto &PtrAdd = cast<GPtrAdd>(MI);
2701 Register LHS = PtrAdd.getBaseReg();
2702 Register RHS = PtrAdd.getOffsetReg();
2703 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2704
2705 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2706 APInt Cst;
2707 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2708 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2709 // G_INTTOPTR uses zero-extension
2710 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2711 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2712 return true;
2713 }
2714 }
2715
2716 return false;
2717}
2718
2720 APInt &NewCst) const {
2721 auto &PtrAdd = cast<GPtrAdd>(MI);
2722 Register Dst = PtrAdd.getReg(0);
2723
2724 Builder.buildConstant(Dst, NewCst);
2725 PtrAdd.eraseFromParent();
2726}
2727
2729 Register &Reg) const {
2730 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2731 Register DstReg = MI.getOperand(0).getReg();
2732 Register SrcReg = MI.getOperand(1).getReg();
2733 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2734 if (OriginalSrcReg.isValid())
2735 SrcReg = OriginalSrcReg;
2736 LLT DstTy = MRI.getType(DstReg);
2737 return mi_match(SrcReg, MRI,
2738 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2739 canReplaceReg(DstReg, Reg, MRI);
2740}
2741
2743 Register &Reg) const {
2744 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2745 Register DstReg = MI.getOperand(0).getReg();
2746 Register SrcReg = MI.getOperand(1).getReg();
2747 LLT DstTy = MRI.getType(DstReg);
2748 if (mi_match(SrcReg, MRI,
2749 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2750 canReplaceReg(DstReg, Reg, MRI)) {
2751 unsigned DstSize = DstTy.getScalarSizeInBits();
2752 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2753 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2754 }
2755 return false;
2756}
2757
2759 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2760 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2761
2762 // ShiftTy > 32 > TruncTy -> 32
2763 if (ShiftSize > 32 && TruncSize < 32)
2764 return ShiftTy.changeElementSize(32);
2765
2766 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2767 // Some targets like it, some don't, some only like it under certain
2768 // conditions/processor versions, etc.
2769 // A TL hook might be needed for this.
2770
2771 // Don't combine
2772 return ShiftTy;
2773}
2774
2776 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2777 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2778 Register DstReg = MI.getOperand(0).getReg();
2779 Register SrcReg = MI.getOperand(1).getReg();
2780
2781 if (!MRI.hasOneNonDBGUse(SrcReg))
2782 return false;
2783
2784 LLT SrcTy = MRI.getType(SrcReg);
2785 LLT DstTy = MRI.getType(DstReg);
2786
2787 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2788 const auto &TL = getTargetLowering();
2789
2790 LLT NewShiftTy;
2791 switch (SrcMI->getOpcode()) {
2792 default:
2793 return false;
2794 case TargetOpcode::G_SHL: {
2795 NewShiftTy = DstTy;
2796
2797 // Make sure new shift amount is legal.
2798 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2799 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2800 return false;
2801 break;
2802 }
2803 case TargetOpcode::G_LSHR:
2804 case TargetOpcode::G_ASHR: {
2805 // For right shifts, we conservatively do not do the transform if the TRUNC
2806 // has any STORE users. The reason is that if we change the type of the
2807 // shift, we may break the truncstore combine.
2808 //
2809 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2810 for (auto &User : MRI.use_instructions(DstReg))
2811 if (User.getOpcode() == TargetOpcode::G_STORE)
2812 return false;
2813
2814 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2815 if (NewShiftTy == SrcTy)
2816 return false;
2817
2818 // Make sure we won't lose information by truncating the high bits.
2819 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2820 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2821 DstTy.getScalarSizeInBits()))
2822 return false;
2823 break;
2824 }
2825 }
2826
2828 {SrcMI->getOpcode(),
2829 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2830 return false;
2831
2832 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2833 return true;
2834}
2835
2837 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2838 MachineInstr *ShiftMI = MatchInfo.first;
2839 LLT NewShiftTy = MatchInfo.second;
2840
2841 Register Dst = MI.getOperand(0).getReg();
2842 LLT DstTy = MRI.getType(Dst);
2843
2844 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2845 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2846 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2847
2848 Register NewShift =
2849 Builder
2850 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2851 .getReg(0);
2852
2853 if (NewShiftTy == DstTy)
2854 replaceRegWith(MRI, Dst, NewShift);
2855 else
2856 Builder.buildTrunc(Dst, NewShift);
2857
2858 eraseInst(MI);
2859}
2860
2862 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2863 return MO.isReg() &&
2864 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2865 });
2866}
2867
2869 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2870 return !MO.isReg() ||
2871 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2872 });
2873}
2874
2876 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2877 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2878 return all_of(Mask, [](int Elt) { return Elt < 0; });
2879}
2880
2882 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2883 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2884 MRI);
2885}
2886
2888 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2889 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2890 MRI);
2891}
2892
2894 MachineInstr &MI) const {
2895 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2896 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2897 "Expected an insert/extract element op");
2898 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2899 if (VecTy.isScalableVector())
2900 return false;
2901
2902 unsigned IdxIdx =
2903 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2904 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2905 if (!Idx)
2906 return false;
2907 return Idx->getZExtValue() >= VecTy.getNumElements();
2908}
2909
2911 unsigned &OpIdx) const {
2912 GSelect &SelMI = cast<GSelect>(MI);
2913 auto Cst =
2914 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2915 if (!Cst)
2916 return false;
2917 OpIdx = Cst->isZero() ? 3 : 2;
2918 return true;
2919}
2920
2921void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2922
2924 const MachineOperand &MOP2) const {
2925 if (!MOP1.isReg() || !MOP2.isReg())
2926 return false;
2927 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2928 if (!InstAndDef1)
2929 return false;
2930 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2931 if (!InstAndDef2)
2932 return false;
2933 MachineInstr *I1 = InstAndDef1->MI;
2934 MachineInstr *I2 = InstAndDef2->MI;
2935
2936 // Handle a case like this:
2937 //
2938 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2939 //
2940 // Even though %0 and %1 are produced by the same instruction they are not
2941 // the same values.
2942 if (I1 == I2)
2943 return MOP1.getReg() == MOP2.getReg();
2944
2945 // If we have an instruction which loads or stores, we can't guarantee that
2946 // it is identical.
2947 //
2948 // For example, we may have
2949 //
2950 // %x1 = G_LOAD %addr (load N from @somewhere)
2951 // ...
2952 // call @foo
2953 // ...
2954 // %x2 = G_LOAD %addr (load N from @somewhere)
2955 // ...
2956 // %or = G_OR %x1, %x2
2957 //
2958 // It's possible that @foo will modify whatever lives at the address we're
2959 // loading from. To be safe, let's just assume that all loads and stores
2960 // are different (unless we have something which is guaranteed to not
2961 // change.)
2962 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2963 return false;
2964
2965 // If both instructions are loads or stores, they are equal only if both
2966 // are dereferenceable invariant loads with the same number of bits.
2967 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2970 if (!LS1 || !LS2)
2971 return false;
2972
2973 if (!I2->isDereferenceableInvariantLoad() ||
2974 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2975 return false;
2976 }
2977
2978 // Check for physical registers on the instructions first to avoid cases
2979 // like this:
2980 //
2981 // %a = COPY $physreg
2982 // ...
2983 // SOMETHING implicit-def $physreg
2984 // ...
2985 // %b = COPY $physreg
2986 //
2987 // These copies are not equivalent.
2988 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2989 return MO.isReg() && MO.getReg().isPhysical();
2990 })) {
2991 // Check if we have a case like this:
2992 //
2993 // %a = COPY $physreg
2994 // %b = COPY %a
2995 //
2996 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2997 // From that, we know that they must have the same value, since they must
2998 // have come from the same COPY.
2999 return I1->isIdenticalTo(*I2);
3000 }
3001
3002 // We don't have any physical registers, so we don't necessarily need the
3003 // same vreg defs.
3004 //
3005 // On the off-chance that there's some target instruction feeding into the
3006 // instruction, let's use produceSameValue instead of isIdenticalTo.
3007 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
3008 // Handle instructions with multiple defs that produce same values. Values
3009 // are same for operands with same index.
3010 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3011 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3012 // I1 and I2 are different instructions but produce same values,
3013 // %1 and %6 are same, %1 and %7 are not the same value.
3014 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
3015 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
3016 }
3017 return false;
3018}
3019
3021 int64_t C) const {
3022 if (!MOP.isReg())
3023 return false;
3024 auto *MI = MRI.getVRegDef(MOP.getReg());
3025 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
3026 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
3027 MaybeCst->getSExtValue() == C;
3028}
3029
3031 double C) const {
3032 if (!MOP.isReg())
3033 return false;
3034 std::optional<FPValueAndVReg> MaybeCst;
3035 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
3036 return false;
3037
3038 return MaybeCst->Value.isExactlyValue(C);
3039}
3040
3042 unsigned OpIdx) const {
3043 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3044 Register OldReg = MI.getOperand(0).getReg();
3045 Register Replacement = MI.getOperand(OpIdx).getReg();
3046 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3047 replaceRegWith(MRI, OldReg, Replacement);
3048 MI.eraseFromParent();
3049}
3050
3052 Register Replacement) const {
3053 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3054 Register OldReg = MI.getOperand(0).getReg();
3055 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3056 replaceRegWith(MRI, OldReg, Replacement);
3057 MI.eraseFromParent();
3058}
3059
3061 unsigned ConstIdx) const {
3062 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3063 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3064
3065 // Get the shift amount
3066 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3067 if (!VRegAndVal)
3068 return false;
3069
3070 // Return true of shift amount >= Bitwidth
3071 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3072}
3073
3075 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3076 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3077 "This is not a funnel shift operation");
3078
3079 Register ConstReg = MI.getOperand(3).getReg();
3080 LLT ConstTy = MRI.getType(ConstReg);
3081 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3082
3083 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3084 assert((VRegAndVal) && "Value is not a constant");
3085
3086 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3087 APInt NewConst = VRegAndVal->Value.urem(
3088 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3089
3090 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3091 Builder.buildInstr(
3092 MI.getOpcode(), {MI.getOperand(0)},
3093 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3094
3095 MI.eraseFromParent();
3096}
3097
3099 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3100 // Match (cond ? x : x)
3101 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3102 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3103 MRI);
3104}
3105
3107 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3108 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3109 MRI);
3110}
3111
3113 unsigned OpIdx) const {
3114 MachineOperand &MO = MI.getOperand(OpIdx);
3115 return MO.isReg() &&
3116 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3117}
3118
3120 const MachineOperand &MO, bool OrNegative) const {
3121 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT, OrNegative);
3122}
3123
3125 double C) const {
3126 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3127 Builder.buildFConstant(MI.getOperand(0), C);
3128 MI.eraseFromParent();
3129}
3130
3132 int64_t C) const {
3133 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3134 Builder.buildConstant(MI.getOperand(0), C);
3135 MI.eraseFromParent();
3136}
3137
3139 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3140 Builder.buildConstant(MI.getOperand(0), C);
3141 MI.eraseFromParent();
3142}
3143
3145 ConstantFP *CFP) const {
3146 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3147 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3148 MI.eraseFromParent();
3149}
3150
3152 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3153 Builder.buildUndef(MI.getOperand(0));
3154 MI.eraseFromParent();
3155}
3156
3158 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3159 Register LHS = MI.getOperand(1).getReg();
3160 Register RHS = MI.getOperand(2).getReg();
3161 Register &NewLHS = std::get<0>(MatchInfo);
3162 Register &NewRHS = std::get<1>(MatchInfo);
3163
3164 // Helper lambda to check for opportunities for
3165 // ((0-A) + B) -> B - A
3166 // (A + (0-B)) -> A - B
3167 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3168 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3169 return false;
3170 NewLHS = MaybeNewLHS;
3171 return true;
3172 };
3173
3174 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3175}
3176
3178 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3179 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3180 "Invalid opcode");
3181 Register DstReg = MI.getOperand(0).getReg();
3182 LLT DstTy = MRI.getType(DstReg);
3183 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3184
3185 if (DstTy.isScalableVector())
3186 return false;
3187
3188 unsigned NumElts = DstTy.getNumElements();
3189 // If this MI is part of a sequence of insert_vec_elts, then
3190 // don't do the combine in the middle of the sequence.
3191 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3192 TargetOpcode::G_INSERT_VECTOR_ELT)
3193 return false;
3194 MachineInstr *CurrInst = &MI;
3195 MachineInstr *TmpInst;
3196 int64_t IntImm;
3197 Register TmpReg;
3198 MatchInfo.resize(NumElts);
3199 while (mi_match(
3200 CurrInst->getOperand(0).getReg(), MRI,
3201 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3202 if (IntImm >= NumElts || IntImm < 0)
3203 return false;
3204 if (!MatchInfo[IntImm])
3205 MatchInfo[IntImm] = TmpReg;
3206 CurrInst = TmpInst;
3207 }
3208 // Variable index.
3209 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3210 return false;
3211 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3212 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3213 if (!MatchInfo[I - 1].isValid())
3214 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3215 }
3216 return true;
3217 }
3218 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3219 // overwritten, bail out.
3220 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3221 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3222}
3223
3225 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3226 Register UndefReg;
3227 auto GetUndef = [&]() {
3228 if (UndefReg)
3229 return UndefReg;
3230 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3231 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3232 return UndefReg;
3233 };
3234 for (Register &Reg : MatchInfo) {
3235 if (!Reg)
3236 Reg = GetUndef();
3237 }
3238 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3239 MI.eraseFromParent();
3240}
3241
3243 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3244 Register SubLHS, SubRHS;
3245 std::tie(SubLHS, SubRHS) = MatchInfo;
3246 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3247 MI.eraseFromParent();
3248}
3249
3250bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3251 unsigned RootOpc, Register Dst,
3252 LLT Ty,
3253 BuildFnTy &MatchInfo) const {
3254 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3255 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3256 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3257 if (!InnerDef)
3258 return false;
3259
3260 unsigned InnerOpc = InnerDef->getOpcode();
3261 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3262 return false;
3263
3264 if (!MRI.hasOneNonDBGUse(MInner))
3265 return false;
3266
3267 Register InnerLHS = InnerDef->getOperand(1).getReg();
3268 Register InnerRHS = InnerDef->getOperand(2).getReg();
3269 Register NotSrc;
3270 Register B, C;
3271
3272 // Check if either operand is ~b
3273 auto TryMatch = [&](Register MaybeNot, Register Other) {
3274 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3275 if (!MRI.hasOneNonDBGUse(MaybeNot))
3276 return false;
3277 B = NotSrc;
3278 C = Other;
3279 return true;
3280 }
3281 return false;
3282 };
3283
3284 if (!TryMatch(InnerLHS, InnerRHS) && !TryMatch(InnerRHS, InnerLHS))
3285 return false;
3286
3287 // Flip add/sub
3288 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3289 : TargetOpcode::G_ADD;
3290
3291 Register A = Other;
3292 MatchInfo = [=](MachineIRBuilder &Builder) {
3293 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3294 auto NewNot = Builder.buildNot(Ty, NewInner);
3295 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3296 };
3297 return true;
3298}
3299
3301 BuildFnTy &MatchInfo) const {
3302 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3303 // Root MI is one of G_AND, G_OR, G_XOR.
3304 // We also look for commuted forms of operations. Pattern shouldn't apply
3305 // if there are multiple reasons of inner operations.
3306
3307 unsigned RootOpc = MI.getOpcode();
3308 Register Dst = MI.getOperand(0).getReg();
3309 LLT Ty = MRI.getType(Dst);
3310
3311 Register LHS = MI.getOperand(1).getReg();
3312 Register RHS = MI.getOperand(2).getReg();
3313 // Check the commuted and uncommuted forms of the operation.
3314 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3315 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3316}
3317
3319 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3320 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3321 //
3322 // Creates the new hand + logic instruction (but does not insert them.)
3323 //
3324 // On success, MatchInfo is populated with the new instructions. These are
3325 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3326 unsigned LogicOpcode = MI.getOpcode();
3327 assert(LogicOpcode == TargetOpcode::G_AND ||
3328 LogicOpcode == TargetOpcode::G_OR ||
3329 LogicOpcode == TargetOpcode::G_XOR);
3330 MachineIRBuilder MIB(MI);
3331 Register Dst = MI.getOperand(0).getReg();
3332 Register LHSReg = MI.getOperand(1).getReg();
3333 Register RHSReg = MI.getOperand(2).getReg();
3334
3335 // Don't recompute anything.
3336 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3337 return false;
3338
3339 // Make sure we have (hand x, ...), (hand y, ...)
3340 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3341 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3342 if (!LeftHandInst || !RightHandInst)
3343 return false;
3344 unsigned HandOpcode = LeftHandInst->getOpcode();
3345 if (HandOpcode != RightHandInst->getOpcode())
3346 return false;
3347 if (LeftHandInst->getNumOperands() < 2 ||
3348 !LeftHandInst->getOperand(1).isReg() ||
3349 RightHandInst->getNumOperands() < 2 ||
3350 !RightHandInst->getOperand(1).isReg())
3351 return false;
3352
3353 // Make sure the types match up, and if we're doing this post-legalization,
3354 // we end up with legal types.
3355 Register X = LeftHandInst->getOperand(1).getReg();
3356 Register Y = RightHandInst->getOperand(1).getReg();
3357 LLT XTy = MRI.getType(X);
3358 LLT YTy = MRI.getType(Y);
3359 if (!XTy.isValid() || XTy != YTy)
3360 return false;
3361
3362 // Optional extra source register.
3363 Register ExtraHandOpSrcReg;
3364 switch (HandOpcode) {
3365 default:
3366 return false;
3367 case TargetOpcode::G_ANYEXT:
3368 case TargetOpcode::G_SEXT:
3369 case TargetOpcode::G_ZEXT: {
3370 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3371 break;
3372 }
3373 case TargetOpcode::G_TRUNC: {
3374 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3375 const MachineFunction *MF = MI.getMF();
3376 LLVMContext &Ctx = MF->getFunction().getContext();
3377
3378 LLT DstTy = MRI.getType(Dst);
3379 const TargetLowering &TLI = getTargetLowering();
3380
3381 // Be extra careful sinking truncate. If it's free, there's no benefit in
3382 // widening a binop.
3383 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3384 return false;
3385 break;
3386 }
3387 case TargetOpcode::G_AND:
3388 case TargetOpcode::G_ASHR:
3389 case TargetOpcode::G_LSHR:
3390 case TargetOpcode::G_SHL: {
3391 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3392 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3393 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3394 return false;
3395 ExtraHandOpSrcReg = ZOp.getReg();
3396 break;
3397 }
3398 }
3399
3400 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3401 return false;
3402
3403 // Record the steps to build the new instructions.
3404 //
3405 // Steps to build (logic x, y)
3406 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3407 OperandBuildSteps LogicBuildSteps = {
3408 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3409 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3410 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3411 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3412
3413 // Steps to build hand (logic x, y), ...z
3414 OperandBuildSteps HandBuildSteps = {
3415 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3416 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3417 if (ExtraHandOpSrcReg.isValid())
3418 HandBuildSteps.push_back(
3419 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3420 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3421
3422 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3423 return true;
3424}
3425
3427 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3428 assert(MatchInfo.InstrsToBuild.size() &&
3429 "Expected at least one instr to build?");
3430 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3431 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3432 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3433 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3434 for (auto &OperandFn : InstrToBuild.OperandFns)
3435 OperandFn(Instr);
3436 }
3437 MI.eraseFromParent();
3438}
3439
3441 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3442 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3443 int64_t ShlCst, AshrCst;
3444 Register Src;
3445 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3446 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3447 m_ICstOrSplat(AshrCst))))
3448 return false;
3449 if (ShlCst != AshrCst)
3450 return false;
3452 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3453 return false;
3454 MatchInfo = std::make_tuple(Src, ShlCst);
3455 return true;
3456}
3457
3459 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3460 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3461 Register Src;
3462 int64_t ShiftAmt;
3463 std::tie(Src, ShiftAmt) = MatchInfo;
3464 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3465 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3466 MI.eraseFromParent();
3467}
3468
3469/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3472 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3473 assert(MI.getOpcode() == TargetOpcode::G_AND);
3474
3475 Register Dst = MI.getOperand(0).getReg();
3476 LLT Ty = MRI.getType(Dst);
3477
3478 Register R;
3479 int64_t C1;
3480 int64_t C2;
3481 if (!mi_match(
3482 Dst, MRI,
3483 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3484 return false;
3485
3486 MatchInfo = [=](MachineIRBuilder &B) {
3487 if (C1 & C2) {
3488 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3489 return;
3490 }
3491 auto Zero = B.buildConstant(Ty, 0);
3492 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3493 };
3494 return true;
3495}
3496
3498 Register &Replacement) const {
3499 // Given
3500 //
3501 // %y:_(sN) = G_SOMETHING
3502 // %x:_(sN) = G_SOMETHING
3503 // %res:_(sN) = G_AND %x, %y
3504 //
3505 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3506 //
3507 // Patterns like this can appear as a result of legalization. E.g.
3508 //
3509 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3510 // %one:_(s32) = G_CONSTANT i32 1
3511 // %and:_(s32) = G_AND %cmp, %one
3512 //
3513 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3514 assert(MI.getOpcode() == TargetOpcode::G_AND);
3515 if (!VT)
3516 return false;
3517
3518 Register AndDst = MI.getOperand(0).getReg();
3519 Register LHS = MI.getOperand(1).getReg();
3520 Register RHS = MI.getOperand(2).getReg();
3521
3522 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3523 // we can't do anything. If we do, then it depends on whether we have
3524 // KnownBits on the LHS.
3525 KnownBits RHSBits = VT->getKnownBits(RHS);
3526 if (RHSBits.isUnknown())
3527 return false;
3528
3529 KnownBits LHSBits = VT->getKnownBits(LHS);
3530
3531 // Check that x & Mask == x.
3532 // x & 1 == x, always
3533 // x & 0 == x, only if x is also 0
3534 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3535 //
3536 // Check if we can replace AndDst with the LHS of the G_AND
3537 if (canReplaceReg(AndDst, LHS, MRI) &&
3538 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3539 Replacement = LHS;
3540 return true;
3541 }
3542
3543 // Check if we can replace AndDst with the RHS of the G_AND
3544 if (canReplaceReg(AndDst, RHS, MRI) &&
3545 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3546 Replacement = RHS;
3547 return true;
3548 }
3549
3550 return false;
3551}
3552
3554 Register &Replacement) const {
3555 // Given
3556 //
3557 // %y:_(sN) = G_SOMETHING
3558 // %x:_(sN) = G_SOMETHING
3559 // %res:_(sN) = G_OR %x, %y
3560 //
3561 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3562 assert(MI.getOpcode() == TargetOpcode::G_OR);
3563 if (!VT)
3564 return false;
3565
3566 Register OrDst = MI.getOperand(0).getReg();
3567 Register LHS = MI.getOperand(1).getReg();
3568 Register RHS = MI.getOperand(2).getReg();
3569
3570 KnownBits LHSBits = VT->getKnownBits(LHS);
3571 KnownBits RHSBits = VT->getKnownBits(RHS);
3572
3573 // Check that x | Mask == x.
3574 // x | 0 == x, always
3575 // x | 1 == x, only if x is also 1
3576 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3577 //
3578 // Check if we can replace OrDst with the LHS of the G_OR
3579 if (canReplaceReg(OrDst, LHS, MRI) &&
3580 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3581 Replacement = LHS;
3582 return true;
3583 }
3584
3585 // Check if we can replace OrDst with the RHS of the G_OR
3586 if (canReplaceReg(OrDst, RHS, MRI) &&
3587 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3588 Replacement = RHS;
3589 return true;
3590 }
3591
3592 return false;
3593}
3594
3596 // If the input is already sign extended, just drop the extension.
3597 Register Src = MI.getOperand(1).getReg();
3598 unsigned ExtBits = MI.getOperand(2).getImm();
3599 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3600 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3601}
3602
3603static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3604 int64_t Cst, bool IsVector, bool IsFP) {
3605 // For i1, Cst will always be -1 regardless of boolean contents.
3606 return (ScalarSizeBits == 1 && Cst == -1) ||
3607 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3608}
3609
3610// This pattern aims to match the following shape to avoid extra mov
3611// instructions
3612// G_BUILD_VECTOR(
3613// G_UNMERGE_VALUES(src, 0)
3614// G_UNMERGE_VALUES(src, 1)
3615// G_IMPLICIT_DEF
3616// G_IMPLICIT_DEF
3617// )
3618// ->
3619// G_CONCAT_VECTORS(
3620// src,
3621// undef
3622// )
3625 Register &UnmergeSrc) const {
3626 auto &BV = cast<GBuildVector>(MI);
3627
3628 unsigned BuildUseCount = BV.getNumSources();
3629 if (BuildUseCount % 2 != 0)
3630 return false;
3631
3632 unsigned NumUnmerge = BuildUseCount / 2;
3633
3634 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3635
3636 // Check the first operand is an unmerge and has the correct number of
3637 // operands
3638 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3639 return false;
3640
3641 UnmergeSrc = Unmerge->getSourceReg();
3642
3643 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3644 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3645
3646 if (!UnmergeSrcTy.isVector())
3647 return false;
3648
3649 // Ensure we only generate legal instructions post-legalizer
3650 if (!IsPreLegalize &&
3651 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3652 return false;
3653
3654 // Check that all of the operands before the midpoint come from the same
3655 // unmerge and are in the same order as they are used in the build_vector
3656 for (unsigned I = 0; I < NumUnmerge; ++I) {
3657 auto MaybeUnmergeReg = BV.getSourceReg(I);
3658 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3659
3660 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3661 return false;
3662
3663 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3664 return false;
3665 }
3666
3667 // Check that all of the unmerged values are used
3668 if (Unmerge->getNumDefs() != NumUnmerge)
3669 return false;
3670
3671 // Check that all of the operands after the mid point are undefs.
3672 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3673 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3674
3675 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3676 return false;
3677 }
3678
3679 return true;
3680}
3681
3685 Register &UnmergeSrc) const {
3686 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3687 B.setInstrAndDebugLoc(MI);
3688
3689 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3690 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3691
3692 MI.eraseFromParent();
3693}
3694
3695// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3696// using vector truncates instead
3697//
3698// EXAMPLE:
3699// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3700// %T_a(i16) = G_TRUNC %a(i32)
3701// %T_b(i16) = G_TRUNC %b(i32)
3702// %Undef(i16) = G_IMPLICIT_DEF(i16)
3703// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3704//
3705// ===>
3706// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3707// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3708// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3709//
3710// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3712 Register &MatchInfo) const {
3713 auto BuildMI = cast<GBuildVector>(&MI);
3714 unsigned NumOperands = BuildMI->getNumSources();
3715 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3716
3717 // Check the G_BUILD_VECTOR sources
3718 unsigned I;
3719 MachineInstr *UnmergeMI = nullptr;
3720
3721 // Check all source TRUNCs come from the same UNMERGE instruction
3722 // and that the element order matches (BUILD_VECTOR position I
3723 // corresponds to UNMERGE result I)
3724 for (I = 0; I < NumOperands; ++I) {
3725 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3726 auto SrcMIOpc = SrcMI->getOpcode();
3727
3728 // Check if the G_TRUNC instructions all come from the same MI
3729 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3730 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3731 if (!UnmergeMI) {
3732 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3733 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3734 return false;
3735 } else {
3736 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3737 if (UnmergeMI != UnmergeSrcMI)
3738 return false;
3739 }
3740 // Verify element ordering: BUILD_VECTOR position I must use
3741 // UNMERGE result I, otherwise the fold would lose element reordering
3742 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3743 return false;
3744 } else {
3745 break;
3746 }
3747 }
3748 if (I < 2)
3749 return false;
3750
3751 // Check the remaining source elements are only G_IMPLICIT_DEF
3752 for (; I < NumOperands; ++I) {
3753 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3754 auto SrcMIOpc = SrcMI->getOpcode();
3755
3756 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3757 return false;
3758 }
3759
3760 // Check the size of unmerge source
3761 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3762 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3763 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3764 return false;
3765
3766 // Check the unmerge source and destination element types match
3767 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3768 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3769 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3770 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3771 return false;
3772
3773 // Only generate legal instructions post-legalizer
3774 if (!IsPreLegalize) {
3775 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3776
3777 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3778 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3779 return false;
3780
3781 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3782 return false;
3783 }
3784
3785 return true;
3786}
3787
3789 Register &MatchInfo) const {
3790 Register MidReg;
3791 auto BuildMI = cast<GBuildVector>(&MI);
3792 Register DstReg = BuildMI->getReg(0);
3793 LLT DstTy = MRI.getType(DstReg);
3794 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3795 unsigned DstTyNumElt = DstTy.getNumElements();
3796 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3797
3798 // No need to pad vector if only G_TRUNC is needed
3799 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3800 MidReg = MatchInfo;
3801 } else {
3802 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3803 SmallVector<Register> ConcatRegs = {MatchInfo};
3804 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3805 ConcatRegs.push_back(UndefReg);
3806
3807 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3808 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3809 }
3810
3811 Builder.buildTrunc(DstReg, MidReg);
3812 MI.eraseFromParent();
3813}
3814
3816 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3817 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3818 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3819 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3820 Register XorSrc;
3821 Register CstReg;
3822 // We match xor(src, true) here.
3823 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3824 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3825 return false;
3826
3827 if (!MRI.hasOneNonDBGUse(XorSrc))
3828 return false;
3829
3830 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3831 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3832 // list of tree nodes to visit.
3833 RegsToNegate.push_back(XorSrc);
3834 // Remember whether the comparisons are all integer or all floating point.
3835 bool IsInt = false;
3836 bool IsFP = false;
3837 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3838 Register Reg = RegsToNegate[I];
3839 if (!MRI.hasOneNonDBGUse(Reg))
3840 return false;
3841 MachineInstr *Def = MRI.getVRegDef(Reg);
3842 switch (Def->getOpcode()) {
3843 default:
3844 // Don't match if the tree contains anything other than ANDs, ORs and
3845 // comparisons.
3846 return false;
3847 case TargetOpcode::G_ICMP:
3848 if (IsFP)
3849 return false;
3850 IsInt = true;
3851 // When we apply the combine we will invert the predicate.
3852 break;
3853 case TargetOpcode::G_FCMP:
3854 if (IsInt)
3855 return false;
3856 IsFP = true;
3857 // When we apply the combine we will invert the predicate.
3858 break;
3859 case TargetOpcode::G_AND:
3860 case TargetOpcode::G_OR:
3861 // Implement De Morgan's laws:
3862 // ~(x & y) -> ~x | ~y
3863 // ~(x | y) -> ~x & ~y
3864 // When we apply the combine we will change the opcode and recursively
3865 // negate the operands.
3866 RegsToNegate.push_back(Def->getOperand(1).getReg());
3867 RegsToNegate.push_back(Def->getOperand(2).getReg());
3868 break;
3869 }
3870 }
3871
3872 // Now we know whether the comparisons are integer or floating point, check
3873 // the constant in the xor.
3874 int64_t Cst;
3875 if (Ty.isVector()) {
3876 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3877 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3878 if (!MaybeCst)
3879 return false;
3880 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3881 return false;
3882 } else {
3883 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3884 return false;
3885 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3886 return false;
3887 }
3888
3889 return true;
3890}
3891
3893 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3894 for (Register Reg : RegsToNegate) {
3895 MachineInstr *Def = MRI.getVRegDef(Reg);
3896 Observer.changingInstr(*Def);
3897 // For each comparison, invert the opcode. For each AND and OR, change the
3898 // opcode.
3899 switch (Def->getOpcode()) {
3900 default:
3901 llvm_unreachable("Unexpected opcode");
3902 case TargetOpcode::G_ICMP:
3903 case TargetOpcode::G_FCMP: {
3904 MachineOperand &PredOp = Def->getOperand(1);
3907 PredOp.setPredicate(NewP);
3908 break;
3909 }
3910 case TargetOpcode::G_AND:
3911 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3912 break;
3913 case TargetOpcode::G_OR:
3914 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3915 break;
3916 }
3917 Observer.changedInstr(*Def);
3918 }
3919
3920 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3921 MI.eraseFromParent();
3922}
3923
3925 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3926 // Match (xor (and x, y), y) (or any of its commuted cases)
3927 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3928 Register &X = MatchInfo.first;
3929 Register &Y = MatchInfo.second;
3930 Register AndReg = MI.getOperand(1).getReg();
3931 Register SharedReg = MI.getOperand(2).getReg();
3932
3933 // Find a G_AND on either side of the G_XOR.
3934 // Look for one of
3935 //
3936 // (xor (and x, y), SharedReg)
3937 // (xor SharedReg, (and x, y))
3938 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3939 std::swap(AndReg, SharedReg);
3940 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3941 return false;
3942 }
3943
3944 // Only do this if we'll eliminate the G_AND.
3945 if (!MRI.hasOneNonDBGUse(AndReg))
3946 return false;
3947
3948 // We can combine if SharedReg is the same as either the LHS or RHS of the
3949 // G_AND.
3950 if (Y != SharedReg)
3951 std::swap(X, Y);
3952 return Y == SharedReg;
3953}
3954
3956 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3957 // Fold (xor (and x, y), y) -> (and (not x), y)
3958 Register X, Y;
3959 std::tie(X, Y) = MatchInfo;
3960 auto Not = Builder.buildNot(MRI.getType(X), X);
3961 Observer.changingInstr(MI);
3962 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3963 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3964 MI.getOperand(2).setReg(Y);
3965 Observer.changedInstr(MI);
3966}
3967
3969 auto &PtrAdd = cast<GPtrAdd>(MI);
3970 Register DstReg = PtrAdd.getReg(0);
3971 LLT Ty = MRI.getType(DstReg);
3972 const DataLayout &DL = Builder.getMF().getDataLayout();
3973
3974 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3975 return false;
3976
3977 if (Ty.isPointer()) {
3978 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3979 return ConstVal && *ConstVal == 0;
3980 }
3981
3982 assert(Ty.isVector() && "Expecting a vector type");
3983 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3984 return isBuildVectorAllZeros(*VecMI, MRI);
3985}
3986
3988 auto &PtrAdd = cast<GPtrAdd>(MI);
3989 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3990 PtrAdd.eraseFromParent();
3991}
3992
3993/// The second source operand is known to be a power of 2.
3995 Register DstReg = MI.getOperand(0).getReg();
3996 Register Src0 = MI.getOperand(1).getReg();
3997 Register Pow2Src1 = MI.getOperand(2).getReg();
3998 LLT Ty = MRI.getType(DstReg);
3999
4000 // Fold (urem x, pow2) -> (and x, pow2-1)
4001 auto NegOne = Builder.buildConstant(Ty, -1);
4002 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
4003 Builder.buildAnd(DstReg, Src0, Add);
4004 MI.eraseFromParent();
4005}
4006
4008 unsigned &SelectOpNo) const {
4009 Register LHS = MI.getOperand(1).getReg();
4010 Register RHS = MI.getOperand(2).getReg();
4011
4012 Register OtherOperandReg = RHS;
4013 SelectOpNo = 1;
4014 MachineInstr *Select = MRI.getVRegDef(LHS);
4015
4016 // Don't do this unless the old select is going away. We want to eliminate the
4017 // binary operator, not replace a binop with a select.
4018 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4019 !MRI.hasOneNonDBGUse(LHS)) {
4020 OtherOperandReg = LHS;
4021 SelectOpNo = 2;
4022 Select = MRI.getVRegDef(RHS);
4023 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4024 !MRI.hasOneNonDBGUse(RHS))
4025 return false;
4026 }
4027
4028 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
4029 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
4030
4031 if (!isConstantOrConstantVector(*SelectLHS, MRI,
4032 /*AllowFP*/ true,
4033 /*AllowOpaqueConstants*/ false))
4034 return false;
4035 if (!isConstantOrConstantVector(*SelectRHS, MRI,
4036 /*AllowFP*/ true,
4037 /*AllowOpaqueConstants*/ false))
4038 return false;
4039
4040 unsigned BinOpcode = MI.getOpcode();
4041
4042 // We know that one of the operands is a select of constants. Now verify that
4043 // the other binary operator operand is either a constant, or we can handle a
4044 // variable.
4045 bool CanFoldNonConst =
4046 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
4047 (isNullOrNullSplat(*SelectLHS, MRI) ||
4048 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
4049 (isNullOrNullSplat(*SelectRHS, MRI) ||
4050 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
4051 if (CanFoldNonConst)
4052 return true;
4053
4054 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4055 /*AllowFP*/ true,
4056 /*AllowOpaqueConstants*/ false);
4057}
4058
4059/// \p SelectOperand is the operand in binary operator \p MI that is the select
4060/// to fold.
4062 MachineInstr &MI, const unsigned &SelectOperand) const {
4063 Register Dst = MI.getOperand(0).getReg();
4064 Register LHS = MI.getOperand(1).getReg();
4065 Register RHS = MI.getOperand(2).getReg();
4066 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4067
4068 Register SelectCond = Select->getOperand(1).getReg();
4069 Register SelectTrue = Select->getOperand(2).getReg();
4070 Register SelectFalse = Select->getOperand(3).getReg();
4071
4072 LLT Ty = MRI.getType(Dst);
4073 unsigned BinOpcode = MI.getOpcode();
4074
4075 Register FoldTrue, FoldFalse;
4076
4077 // We have a select-of-constants followed by a binary operator with a
4078 // constant. Eliminate the binop by pulling the constant math into the select.
4079 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4080 if (SelectOperand == 1) {
4081 // TODO: SelectionDAG verifies this actually constant folds before
4082 // committing to the combine.
4083
4084 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4085 FoldFalse =
4086 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4087 } else {
4088 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4089 FoldFalse =
4090 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4091 }
4092
4093 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4094 MI.eraseFromParent();
4095}
4096
4097std::optional<SmallVector<Register, 8>>
4098CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4099 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4100 // We want to detect if Root is part of a tree which represents a bunch
4101 // of loads being merged into a larger load. We'll try to recognize patterns
4102 // like, for example:
4103 //
4104 // Reg Reg
4105 // \ /
4106 // OR_1 Reg
4107 // \ /
4108 // OR_2
4109 // \ Reg
4110 // .. /
4111 // Root
4112 //
4113 // Reg Reg Reg Reg
4114 // \ / \ /
4115 // OR_1 OR_2
4116 // \ /
4117 // \ /
4118 // ...
4119 // Root
4120 //
4121 // Each "Reg" may have been produced by a load + some arithmetic. This
4122 // function will save each of them.
4123 SmallVector<Register, 8> RegsToVisit;
4125
4126 // In the "worst" case, we're dealing with a load for each byte. So, there
4127 // are at most #bytes - 1 ORs.
4128 const unsigned MaxIter =
4129 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4130 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4131 if (Ors.empty())
4132 break;
4133 const MachineInstr *Curr = Ors.pop_back_val();
4134 Register OrLHS = Curr->getOperand(1).getReg();
4135 Register OrRHS = Curr->getOperand(2).getReg();
4136
4137 // In the combine, we want to elimate the entire tree.
4138 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4139 return std::nullopt;
4140
4141 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4142 // something that may be a load + arithmetic.
4143 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4144 Ors.push_back(Or);
4145 else
4146 RegsToVisit.push_back(OrLHS);
4147 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4148 Ors.push_back(Or);
4149 else
4150 RegsToVisit.push_back(OrRHS);
4151 }
4152
4153 // We're going to try and merge each register into a wider power-of-2 type,
4154 // so we ought to have an even number of registers.
4155 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4156 return std::nullopt;
4157 return RegsToVisit;
4158}
4159
4160/// Helper function for findLoadOffsetsForLoadOrCombine.
4161///
4162/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4163/// and then moving that value into a specific byte offset.
4164///
4165/// e.g. x[i] << 24
4166///
4167/// \returns The load instruction and the byte offset it is moved into.
4168static std::optional<std::pair<GZExtLoad *, int64_t>>
4169matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4170 const MachineRegisterInfo &MRI) {
4171 assert(MRI.hasOneNonDBGUse(Reg) &&
4172 "Expected Reg to only have one non-debug use?");
4173 Register MaybeLoad;
4174 int64_t Shift;
4175 if (!mi_match(Reg, MRI,
4176 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4177 Shift = 0;
4178 MaybeLoad = Reg;
4179 }
4180
4181 if (Shift % MemSizeInBits != 0)
4182 return std::nullopt;
4183
4184 // TODO: Handle other types of loads.
4185 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4186 if (!Load)
4187 return std::nullopt;
4188
4189 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4190 return std::nullopt;
4191
4192 return std::make_pair(Load, Shift / MemSizeInBits);
4193}
4194
4195std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4196CombinerHelper::findLoadOffsetsForLoadOrCombine(
4198 const SmallVector<Register, 8> &RegsToVisit,
4199 const unsigned MemSizeInBits) const {
4200
4201 // Each load found for the pattern. There should be one for each RegsToVisit.
4202 SmallSetVector<const MachineInstr *, 8> Loads;
4203
4204 // The lowest index used in any load. (The lowest "i" for each x[i].)
4205 int64_t LowestIdx = INT64_MAX;
4206
4207 // The load which uses the lowest index.
4208 GZExtLoad *LowestIdxLoad = nullptr;
4209
4210 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4211 SmallSet<int64_t, 8> SeenIdx;
4212
4213 // Ensure each load is in the same MBB.
4214 // TODO: Support multiple MachineBasicBlocks.
4215 MachineBasicBlock *MBB = nullptr;
4216 const MachineMemOperand *MMO = nullptr;
4217
4218 // Earliest instruction-order load in the pattern.
4219 GZExtLoad *EarliestLoad = nullptr;
4220
4221 // Latest instruction-order load in the pattern.
4222 GZExtLoad *LatestLoad = nullptr;
4223
4224 // Base pointer which every load should share.
4226
4227 // We want to find a load for each register. Each load should have some
4228 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4229 // track of the load which uses the lowest index. Later, we will check if we
4230 // can use its pointer in the final, combined load.
4231 for (auto Reg : RegsToVisit) {
4232 // Find the load, and find the position that it will end up in (e.g. a
4233 // shifted) value.
4234 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4235 if (!LoadAndPos)
4236 return std::nullopt;
4237 GZExtLoad *Load;
4238 int64_t DstPos;
4239 std::tie(Load, DstPos) = *LoadAndPos;
4240
4241 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4242 // it is difficult to check for stores/calls/etc between loads.
4243 MachineBasicBlock *LoadMBB = Load->getParent();
4244 if (!MBB)
4245 MBB = LoadMBB;
4246 if (LoadMBB != MBB)
4247 return std::nullopt;
4248
4249 // Make sure that the MachineMemOperands of every seen load are compatible.
4250 auto &LoadMMO = Load->getMMO();
4251 if (!MMO)
4252 MMO = &LoadMMO;
4253 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4254 return std::nullopt;
4255
4256 // Find out what the base pointer and index for the load is.
4257 Register LoadPtr;
4258 int64_t Idx;
4259 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4260 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4261 LoadPtr = Load->getOperand(1).getReg();
4262 Idx = 0;
4263 }
4264
4265 // Don't combine things like a[i], a[i] -> a bigger load.
4266 if (!SeenIdx.insert(Idx).second)
4267 return std::nullopt;
4268
4269 // Every load must share the same base pointer; don't combine things like:
4270 //
4271 // a[i], b[i + 1] -> a bigger load.
4272 if (!BasePtr.isValid())
4273 BasePtr = LoadPtr;
4274 if (BasePtr != LoadPtr)
4275 return std::nullopt;
4276
4277 if (Idx < LowestIdx) {
4278 LowestIdx = Idx;
4279 LowestIdxLoad = Load;
4280 }
4281
4282 // Keep track of the byte offset that this load ends up at. If we have seen
4283 // the byte offset, then stop here. We do not want to combine:
4284 //
4285 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4286 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4287 return std::nullopt;
4288 Loads.insert(Load);
4289
4290 // Keep track of the position of the earliest/latest loads in the pattern.
4291 // We will check that there are no load fold barriers between them later
4292 // on.
4293 //
4294 // FIXME: Is there a better way to check for load fold barriers?
4295 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4296 EarliestLoad = Load;
4297 if (!LatestLoad || dominates(*LatestLoad, *Load))
4298 LatestLoad = Load;
4299 }
4300
4301 // We found a load for each register. Let's check if each load satisfies the
4302 // pattern.
4303 assert(Loads.size() == RegsToVisit.size() &&
4304 "Expected to find a load for each register?");
4305 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4306 LatestLoad && "Expected at least two loads?");
4307
4308 // Check if there are any stores, calls, etc. between any of the loads. If
4309 // there are, then we can't safely perform the combine.
4310 //
4311 // MaxIter is chosen based off the (worst case) number of iterations it
4312 // typically takes to succeed in the LLVM test suite plus some padding.
4313 //
4314 // FIXME: Is there a better way to check for load fold barriers?
4315 const unsigned MaxIter = 20;
4316 unsigned Iter = 0;
4317 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4318 LatestLoad->getIterator())) {
4319 if (Loads.count(&MI))
4320 continue;
4321 if (MI.isLoadFoldBarrier())
4322 return std::nullopt;
4323 if (Iter++ == MaxIter)
4324 return std::nullopt;
4325 }
4326
4327 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4328}
4329
4332 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4333 assert(MI.getOpcode() == TargetOpcode::G_OR);
4334 MachineFunction &MF = *MI.getMF();
4335 // Assuming a little-endian target, transform:
4336 // s8 *a = ...
4337 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4338 // =>
4339 // s32 val = *((i32)a)
4340 //
4341 // s8 *a = ...
4342 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4343 // =>
4344 // s32 val = BSWAP(*((s32)a))
4345 Register Dst = MI.getOperand(0).getReg();
4346 LLT Ty = MRI.getType(Dst);
4347 if (Ty.isVector())
4348 return false;
4349
4350 // We need to combine at least two loads into this type. Since the smallest
4351 // possible load is into a byte, we need at least a 16-bit wide type.
4352 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4353 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4354 return false;
4355
4356 // Match a collection of non-OR instructions in the pattern.
4357 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4358 if (!RegsToVisit)
4359 return false;
4360
4361 // We have a collection of non-OR instructions. Figure out how wide each of
4362 // the small loads should be based off of the number of potential loads we
4363 // found.
4364 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4365 if (NarrowMemSizeInBits % 8 != 0)
4366 return false;
4367
4368 // Check if each register feeding into each OR is a load from the same
4369 // base pointer + some arithmetic.
4370 //
4371 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4372 //
4373 // Also verify that each of these ends up putting a[i] into the same memory
4374 // offset as a load into a wide type would.
4376 GZExtLoad *LowestIdxLoad, *LatestLoad;
4377 int64_t LowestIdx;
4378 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4379 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4380 if (!MaybeLoadInfo)
4381 return false;
4382 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4383
4384 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4385 // we found before, check if this corresponds to a big or little endian byte
4386 // pattern. If it does, then we can represent it using a load + possibly a
4387 // BSWAP.
4388 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4389 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4390 if (!IsBigEndian)
4391 return false;
4392 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4393 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4394 return false;
4395
4396 // Make sure that the load from the lowest index produces offset 0 in the
4397 // final value.
4398 //
4399 // This ensures that we won't combine something like this:
4400 //
4401 // load x[i] -> byte 2
4402 // load x[i+1] -> byte 0 ---> wide_load x[i]
4403 // load x[i+2] -> byte 1
4404 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4405 const unsigned ZeroByteOffset =
4406 *IsBigEndian
4407 ? bigEndianByteAt(NumLoadsInTy, 0)
4408 : littleEndianByteAt(NumLoadsInTy, 0);
4409 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4410 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4411 ZeroOffsetIdx->second != LowestIdx)
4412 return false;
4413
4414 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4415 // may not use index 0.
4416 Register Ptr = LowestIdxLoad->getPointerReg();
4417 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4418 LegalityQuery::MemDesc MMDesc(MMO);
4419 MMDesc.MemoryTy = Ty;
4421 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4422 return false;
4423 auto PtrInfo = MMO.getPointerInfo();
4424 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4425
4426 // Load must be allowed and fast on the target.
4428 auto &DL = MF.getDataLayout();
4429 unsigned Fast = 0;
4430 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4431 !Fast)
4432 return false;
4433
4434 MatchInfo = [=](MachineIRBuilder &MIB) {
4435 MIB.setInstrAndDebugLoc(*LatestLoad);
4436 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4437 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4438 if (NeedsBSwap)
4439 MIB.buildBSwap(Dst, LoadDst);
4440 };
4441 return true;
4442}
4443
4445 MachineInstr *&ExtMI) const {
4446 auto &PHI = cast<GPhi>(MI);
4447 Register DstReg = PHI.getReg(0);
4448
4449 // TODO: Extending a vector may be expensive, don't do this until heuristics
4450 // are better.
4451 if (MRI.getType(DstReg).isVector())
4452 return false;
4453
4454 // Try to match a phi, whose only use is an extend.
4455 if (!MRI.hasOneNonDBGUse(DstReg))
4456 return false;
4457 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4458 switch (ExtMI->getOpcode()) {
4459 case TargetOpcode::G_ANYEXT:
4460 return true; // G_ANYEXT is usually free.
4461 case TargetOpcode::G_ZEXT:
4462 case TargetOpcode::G_SEXT:
4463 break;
4464 default:
4465 return false;
4466 }
4467
4468 // If the target is likely to fold this extend away, don't propagate.
4469 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4470 return false;
4471
4472 // We don't want to propagate the extends unless there's a good chance that
4473 // they'll be optimized in some way.
4474 // Collect the unique incoming values.
4476 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4477 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4478 switch (DefMI->getOpcode()) {
4479 case TargetOpcode::G_LOAD:
4480 case TargetOpcode::G_TRUNC:
4481 case TargetOpcode::G_SEXT:
4482 case TargetOpcode::G_ZEXT:
4483 case TargetOpcode::G_ANYEXT:
4484 case TargetOpcode::G_CONSTANT:
4485 InSrcs.insert(DefMI);
4486 // Don't try to propagate if there are too many places to create new
4487 // extends, chances are it'll increase code size.
4488 if (InSrcs.size() > 2)
4489 return false;
4490 break;
4491 default:
4492 return false;
4493 }
4494 }
4495 return true;
4496}
4497
4499 MachineInstr *&ExtMI) const {
4500 auto &PHI = cast<GPhi>(MI);
4501 Register DstReg = ExtMI->getOperand(0).getReg();
4502 LLT ExtTy = MRI.getType(DstReg);
4503
4504 // Propagate the extension into the block of each incoming reg's block.
4505 // Use a SetVector here because PHIs can have duplicate edges, and we want
4506 // deterministic iteration order.
4509 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4510 auto SrcReg = PHI.getIncomingValue(I);
4511 auto *SrcMI = MRI.getVRegDef(SrcReg);
4512 if (!SrcMIs.insert(SrcMI))
4513 continue;
4514
4515 // Build an extend after each src inst.
4516 auto *MBB = SrcMI->getParent();
4517 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4518 if (InsertPt != MBB->end() && InsertPt->isPHI())
4519 InsertPt = MBB->getFirstNonPHI();
4520
4521 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4522 Builder.setDebugLoc(MI.getDebugLoc());
4523 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4524 OldToNewSrcMap[SrcMI] = NewExt;
4525 }
4526
4527 // Create a new phi with the extended inputs.
4528 Builder.setInstrAndDebugLoc(MI);
4529 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4530 NewPhi.addDef(DstReg);
4531 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4532 if (!MO.isReg()) {
4533 NewPhi.addMBB(MO.getMBB());
4534 continue;
4535 }
4536 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4537 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4538 }
4539 Builder.insertInstr(NewPhi);
4540 ExtMI->eraseFromParent();
4541}
4542
4544 Register &Reg) const {
4545 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4546 // If we have a constant index, look for a G_BUILD_VECTOR source
4547 // and find the source register that the index maps to.
4548 Register SrcVec = MI.getOperand(1).getReg();
4549 LLT SrcTy = MRI.getType(SrcVec);
4550 if (SrcTy.isScalableVector())
4551 return false;
4552
4553 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4554 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4555 return false;
4556
4557 unsigned VecIdx = Cst->Value.getZExtValue();
4558
4559 // Check if we have a build_vector or build_vector_trunc with an optional
4560 // trunc in front.
4561 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4562 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4563 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4564 }
4565
4566 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4567 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4568 return false;
4569
4570 EVT Ty(getMVTForLLT(SrcTy));
4571 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4572 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4573 return false;
4574
4575 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4576 return true;
4577}
4578
4580 Register &Reg) const {
4581 // Check the type of the register, since it may have come from a
4582 // G_BUILD_VECTOR_TRUNC.
4583 LLT ScalarTy = MRI.getType(Reg);
4584 Register DstReg = MI.getOperand(0).getReg();
4585 LLT DstTy = MRI.getType(DstReg);
4586
4587 if (ScalarTy != DstTy) {
4588 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4589 Builder.buildTrunc(DstReg, Reg);
4590 MI.eraseFromParent();
4591 return;
4592 }
4594}
4595
4598 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4599 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4600 // This combine tries to find build_vector's which have every source element
4601 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4602 // the masked load scalarization is run late in the pipeline. There's already
4603 // a combine for a similar pattern starting from the extract, but that
4604 // doesn't attempt to do it if there are multiple uses of the build_vector,
4605 // which in this case is true. Starting the combine from the build_vector
4606 // feels more natural than trying to find sibling nodes of extracts.
4607 // E.g.
4608 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4609 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4610 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4611 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4612 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4613 // ==>
4614 // replace ext{1,2,3,4} with %s{1,2,3,4}
4615
4616 Register DstReg = MI.getOperand(0).getReg();
4617 LLT DstTy = MRI.getType(DstReg);
4618 unsigned NumElts = DstTy.getNumElements();
4619
4620 SmallBitVector ExtractedElts(NumElts);
4621 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4622 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4623 return false;
4624 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4625 if (!Cst)
4626 return false;
4627 unsigned Idx = Cst->getZExtValue();
4628 if (Idx >= NumElts)
4629 return false; // Out of range.
4630 ExtractedElts.set(Idx);
4631 SrcDstPairs.emplace_back(
4632 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4633 }
4634 // Match if every element was extracted.
4635 return ExtractedElts.all();
4636}
4637
4640 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4641 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4642 for (auto &Pair : SrcDstPairs) {
4643 auto *ExtMI = Pair.second;
4644 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4645 ExtMI->eraseFromParent();
4646 }
4647 MI.eraseFromParent();
4648}
4649
4652 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4653 applyBuildFnNoErase(MI, MatchInfo);
4654 MI.eraseFromParent();
4655}
4656
4659 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4660 MatchInfo(Builder);
4661}
4662
4664 bool AllowScalarConstants,
4665 BuildFnTy &MatchInfo) const {
4666 assert(MI.getOpcode() == TargetOpcode::G_OR);
4667
4668 Register Dst = MI.getOperand(0).getReg();
4669 LLT Ty = MRI.getType(Dst);
4670 unsigned BitWidth = Ty.getScalarSizeInBits();
4671
4672 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4673 unsigned FshOpc = 0;
4674
4675 // Match (or (shl ...), (lshr ...)).
4676 if (!mi_match(Dst, MRI,
4677 // m_GOr() handles the commuted version as well.
4678 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4679 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4680 return false;
4681
4682 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4683 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4684 int64_t CstShlAmt = 0, CstLShrAmt;
4685 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4686 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4687 CstShlAmt + CstLShrAmt == BitWidth) {
4688 FshOpc = TargetOpcode::G_FSHR;
4689 Amt = LShrAmt;
4690 } else if (mi_match(LShrAmt, MRI,
4692 ShlAmt == Amt) {
4693 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4694 FshOpc = TargetOpcode::G_FSHL;
4695 } else if (mi_match(ShlAmt, MRI,
4697 LShrAmt == Amt) {
4698 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4699 FshOpc = TargetOpcode::G_FSHR;
4700 } else {
4701 return false;
4702 }
4703
4704 LLT AmtTy = MRI.getType(Amt);
4705 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4706 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4707 return false;
4708
4709 MatchInfo = [=](MachineIRBuilder &B) {
4710 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4711 };
4712 return true;
4713}
4714
4715/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4717 unsigned Opc = MI.getOpcode();
4718 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4719 Register X = MI.getOperand(1).getReg();
4720 Register Y = MI.getOperand(2).getReg();
4721 if (X != Y)
4722 return false;
4723 unsigned RotateOpc =
4724 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4725 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4726}
4727
4729 unsigned Opc = MI.getOpcode();
4730 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4731 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4732 Observer.changingInstr(MI);
4733 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4734 : TargetOpcode::G_ROTR));
4735 MI.removeOperand(2);
4736 Observer.changedInstr(MI);
4737}
4738
4739// Fold (rot x, c) -> (rot x, c % BitSize)
4741 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4742 MI.getOpcode() == TargetOpcode::G_ROTR);
4743 unsigned Bitsize =
4744 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4745 Register AmtReg = MI.getOperand(2).getReg();
4746 bool OutOfRange = false;
4747 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4748 if (auto *CI = dyn_cast<ConstantInt>(C))
4749 OutOfRange |= CI->getValue().uge(Bitsize);
4750 return true;
4751 };
4752 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4753}
4754
4756 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4757 MI.getOpcode() == TargetOpcode::G_ROTR);
4758 unsigned Bitsize =
4759 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4760 Register Amt = MI.getOperand(2).getReg();
4761 LLT AmtTy = MRI.getType(Amt);
4762 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4763 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4764 Observer.changingInstr(MI);
4765 MI.getOperand(2).setReg(Amt);
4766 Observer.changedInstr(MI);
4767}
4768
4770 int64_t &MatchInfo) const {
4771 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4772 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4773
4774 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4775 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4776 // KnownBits on the LHS in two cases:
4777 //
4778 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4779 // we cannot do any transforms so we can safely bail out early.
4780 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4781 // >=0.
4782 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4783 if (KnownRHS.isUnknown())
4784 return false;
4785
4786 std::optional<bool> KnownVal;
4787 if (KnownRHS.isZero()) {
4788 // ? uge 0 -> always true
4789 // ? ult 0 -> always false
4790 if (Pred == CmpInst::ICMP_UGE)
4791 KnownVal = true;
4792 else if (Pred == CmpInst::ICMP_ULT)
4793 KnownVal = false;
4794 }
4795
4796 if (!KnownVal) {
4797 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4798 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4799 }
4800
4801 if (!KnownVal)
4802 return false;
4803 MatchInfo =
4804 *KnownVal
4806 /*IsVector = */
4807 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4808 /* IsFP = */ false)
4809 : 0;
4810 return true;
4811}
4812
4815 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4816 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4817 // Given:
4818 //
4819 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4820 // %cmp = G_ICMP ne %x, 0
4821 //
4822 // Or:
4823 //
4824 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4825 // %cmp = G_ICMP eq %x, 1
4826 //
4827 // We can replace %cmp with %x assuming true is 1 on the target.
4828 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4829 if (!CmpInst::isEquality(Pred))
4830 return false;
4831 Register Dst = MI.getOperand(0).getReg();
4832 LLT DstTy = MRI.getType(Dst);
4834 /* IsFP = */ false) != 1)
4835 return false;
4836 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4837 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4838 return false;
4839 Register LHS = MI.getOperand(2).getReg();
4840 auto KnownLHS = VT->getKnownBits(LHS);
4841 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4842 return false;
4843 // Make sure replacing Dst with the LHS is a legal operation.
4844 LLT LHSTy = MRI.getType(LHS);
4845 unsigned LHSSize = LHSTy.getSizeInBits();
4846 unsigned DstSize = DstTy.getSizeInBits();
4847 unsigned Op = TargetOpcode::COPY;
4848 if (DstSize != LHSSize)
4849 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4850 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4851 return false;
4852 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4853 return true;
4854}
4855
4856// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4859 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4860 assert(MI.getOpcode() == TargetOpcode::G_AND);
4861
4862 // Ignore vector types to simplify matching the two constants.
4863 // TODO: do this for vectors and scalars via a demanded bits analysis.
4864 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4865 if (Ty.isVector())
4866 return false;
4867
4868 Register Src;
4869 Register AndMaskReg;
4870 int64_t AndMaskBits;
4871 int64_t OrMaskBits;
4872 if (!mi_match(MI, MRI,
4873 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4874 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4875 return false;
4876
4877 // Check if OrMask could turn on any bits in Src.
4878 if (AndMaskBits & OrMaskBits)
4879 return false;
4880
4881 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4882 Observer.changingInstr(MI);
4883 // Canonicalize the result to have the constant on the RHS.
4884 if (MI.getOperand(1).getReg() == AndMaskReg)
4885 MI.getOperand(2).setReg(AndMaskReg);
4886 MI.getOperand(1).setReg(Src);
4887 Observer.changedInstr(MI);
4888 };
4889 return true;
4890}
4891
4892/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4895 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4896 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4897 Register Dst = MI.getOperand(0).getReg();
4898 Register Src = MI.getOperand(1).getReg();
4899 LLT Ty = MRI.getType(Src);
4901 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4902 return false;
4903 int64_t Width = MI.getOperand(2).getImm();
4904 Register ShiftSrc;
4905 int64_t ShiftImm;
4906 if (!mi_match(
4907 Src, MRI,
4908 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4909 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4910 return false;
4911 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4912 return false;
4913
4914 MatchInfo = [=](MachineIRBuilder &B) {
4915 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4916 auto Cst2 = B.buildConstant(ExtractTy, Width);
4917 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4918 };
4919 return true;
4920}
4921
4922/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4924 BuildFnTy &MatchInfo) const {
4925 GAnd *And = cast<GAnd>(&MI);
4926 Register Dst = And->getReg(0);
4927 LLT Ty = MRI.getType(Dst);
4929 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4930 // into account.
4931 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4932 return false;
4933
4934 int64_t AndImm, LSBImm;
4935 Register ShiftSrc;
4936 const unsigned Size = Ty.getScalarSizeInBits();
4937 if (!mi_match(And->getReg(0), MRI,
4938 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4939 m_ICst(AndImm))))
4940 return false;
4941
4942 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4943 auto MaybeMask = static_cast<uint64_t>(AndImm);
4944 if (MaybeMask & (MaybeMask + 1))
4945 return false;
4946
4947 // LSB must fit within the register.
4948 if (static_cast<uint64_t>(LSBImm) >= Size)
4949 return false;
4950
4951 uint64_t Width = APInt(Size, AndImm).countr_one();
4952 MatchInfo = [=](MachineIRBuilder &B) {
4953 auto WidthCst = B.buildConstant(ExtractTy, Width);
4954 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4955 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4956 };
4957 return true;
4958}
4959
4962 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4963 const unsigned Opcode = MI.getOpcode();
4964 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4965
4966 const Register Dst = MI.getOperand(0).getReg();
4967
4968 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4969 ? TargetOpcode::G_SBFX
4970 : TargetOpcode::G_UBFX;
4971
4972 // Check if the type we would use for the extract is legal
4973 LLT Ty = MRI.getType(Dst);
4975 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4976 return false;
4977
4978 Register ShlSrc;
4979 int64_t ShrAmt;
4980 int64_t ShlAmt;
4981 const unsigned Size = Ty.getScalarSizeInBits();
4982
4983 // Try to match shr (shl x, c1), c2
4984 if (!mi_match(Dst, MRI,
4985 m_BinOp(Opcode,
4986 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4987 m_ICst(ShrAmt))))
4988 return false;
4989
4990 // Make sure that the shift sizes can fit a bitfield extract
4991 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4992 return false;
4993
4994 // Skip this combine if the G_SEXT_INREG combine could handle it
4995 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4996 return false;
4997
4998 // Calculate start position and width of the extract
4999 const int64_t Pos = ShrAmt - ShlAmt;
5000 const int64_t Width = Size - ShrAmt;
5001
5002 MatchInfo = [=](MachineIRBuilder &B) {
5003 auto WidthCst = B.buildConstant(ExtractTy, Width);
5004 auto PosCst = B.buildConstant(ExtractTy, Pos);
5005 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
5006 };
5007 return true;
5008}
5009
5012 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5013 const unsigned Opcode = MI.getOpcode();
5014 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
5015
5016 const Register Dst = MI.getOperand(0).getReg();
5017 LLT Ty = MRI.getType(Dst);
5019 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
5020 return false;
5021
5022 // Try to match shr (and x, c1), c2
5023 Register AndSrc;
5024 int64_t ShrAmt;
5025 int64_t SMask;
5026 if (!mi_match(Dst, MRI,
5027 m_BinOp(Opcode,
5028 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
5029 m_ICst(ShrAmt))))
5030 return false;
5031
5032 const unsigned Size = Ty.getScalarSizeInBits();
5033 if (ShrAmt < 0 || ShrAmt >= Size)
5034 return false;
5035
5036 // If the shift subsumes the mask, emit the 0 directly.
5037 if (0 == (SMask >> ShrAmt)) {
5038 MatchInfo = [=](MachineIRBuilder &B) {
5039 B.buildConstant(Dst, 0);
5040 };
5041 return true;
5042 }
5043
5044 // Check that ubfx can do the extraction, with no holes in the mask.
5045 uint64_t UMask = SMask;
5046 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
5048 if (!isMask_64(UMask))
5049 return false;
5050
5051 // Calculate start position and width of the extract.
5052 const int64_t Pos = ShrAmt;
5053 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
5054
5055 // It's preferable to keep the shift, rather than form G_SBFX.
5056 // TODO: remove the G_AND via demanded bits analysis.
5057 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5058 return false;
5059
5060 MatchInfo = [=](MachineIRBuilder &B) {
5061 auto WidthCst = B.buildConstant(ExtractTy, Width);
5062 auto PosCst = B.buildConstant(ExtractTy, Pos);
5063 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5064 };
5065 return true;
5066}
5067
5068bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5069 MachineInstr &MI) const {
5070 auto &PtrAdd = cast<GPtrAdd>(MI);
5071
5072 Register Src1Reg = PtrAdd.getBaseReg();
5073 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5074 if (!Src1Def)
5075 return false;
5076
5077 Register Src2Reg = PtrAdd.getOffsetReg();
5078
5079 if (MRI.hasOneNonDBGUse(Src1Reg))
5080 return false;
5081
5082 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5083 if (!C1)
5084 return false;
5085 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5086 if (!C2)
5087 return false;
5088
5089 const APInt &C1APIntVal = *C1;
5090 const APInt &C2APIntVal = *C2;
5091 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5092
5093 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5094 // This combine may end up running before ptrtoint/inttoptr combines
5095 // manage to eliminate redundant conversions, so try to look through them.
5096 MachineInstr *ConvUseMI = &UseMI;
5097 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5098 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5099 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5100 Register DefReg = ConvUseMI->getOperand(0).getReg();
5101 if (!MRI.hasOneNonDBGUse(DefReg))
5102 break;
5103 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5104 ConvUseOpc = ConvUseMI->getOpcode();
5105 }
5106 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5107 if (!LdStMI)
5108 continue;
5109 // Is x[offset2] already not a legal addressing mode? If so then
5110 // reassociating the constants breaks nothing (we test offset2 because
5111 // that's the one we hope to fold into the load or store).
5112 TargetLoweringBase::AddrMode AM;
5113 AM.HasBaseReg = true;
5114 AM.BaseOffs = C2APIntVal.getSExtValue();
5115 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5116 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5117 PtrAdd.getMF()->getFunction().getContext());
5118 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5119 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5120 AccessTy, AS))
5121 continue;
5122
5123 // Would x[offset1+offset2] still be a legal addressing mode?
5124 AM.BaseOffs = CombinedValue;
5125 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5126 AccessTy, AS))
5127 return true;
5128 }
5129
5130 return false;
5131}
5132
5134 MachineInstr *RHS,
5135 BuildFnTy &MatchInfo) const {
5136 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5137 Register Src1Reg = MI.getOperand(1).getReg();
5138 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5139 return false;
5140 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5141 if (!C2)
5142 return false;
5143
5144 // If both additions are nuw, the reassociated additions are also nuw.
5145 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5146 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5147 // therefore also nusw.
5148 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5149 // the new G_PTR_ADDs are then also inbounds.
5150 unsigned PtrAddFlags = MI.getFlags();
5151 unsigned AddFlags = RHS->getFlags();
5152 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5153 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5154 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5155 unsigned Flags = 0;
5156 if (IsNoUWrap)
5158 if (IsNoUSWrap)
5160 if (IsInBounds)
5162
5163 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5164 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5165
5166 auto NewBase =
5167 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5168 Observer.changingInstr(MI);
5169 MI.getOperand(1).setReg(NewBase.getReg(0));
5170 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5171 MI.setFlags(Flags);
5172 Observer.changedInstr(MI);
5173 };
5174 return !reassociationCanBreakAddressingModePattern(MI);
5175}
5176
5178 MachineInstr *LHS,
5179 MachineInstr *RHS,
5180 BuildFnTy &MatchInfo) const {
5181 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5182 // if and only if (G_PTR_ADD X, C) has one use.
5183 Register LHSBase;
5184 std::optional<ValueAndVReg> LHSCstOff;
5185 if (!mi_match(MI.getBaseReg(), MRI,
5186 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5187 return false;
5188
5189 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5190
5191 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5192 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5193 // so the new G_PTR_ADDs are also inbounds.
5194 unsigned PtrAddFlags = MI.getFlags();
5195 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5196 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5197 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5199 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5201 unsigned Flags = 0;
5202 if (IsNoUWrap)
5204 if (IsNoUSWrap)
5206 if (IsInBounds)
5208
5209 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5210 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5211 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5212 // doesn't happen.
5213 LHSPtrAdd->moveBefore(&MI);
5214 Register RHSReg = MI.getOffsetReg();
5215 // set VReg will cause type mismatch if it comes from extend/trunc
5216 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5217 Observer.changingInstr(MI);
5218 MI.getOperand(2).setReg(NewCst.getReg(0));
5219 MI.setFlags(Flags);
5220 Observer.changedInstr(MI);
5221 Observer.changingInstr(*LHSPtrAdd);
5222 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5223 LHSPtrAdd->setFlags(Flags);
5224 Observer.changedInstr(*LHSPtrAdd);
5225 };
5226 return !reassociationCanBreakAddressingModePattern(MI);
5227}
5228
5230 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5231 BuildFnTy &MatchInfo) const {
5232 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5233 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5234 if (!LHSPtrAdd)
5235 return false;
5236
5237 Register Src2Reg = MI.getOperand(2).getReg();
5238 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5239 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5240 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5241 if (!C1)
5242 return false;
5243 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5244 if (!C2)
5245 return false;
5246
5247 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5248 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5249 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5250 // largest signed integer that fits into the index type, which is the maximum
5251 // size of allocated objects according to the IR Language Reference.
5252 unsigned PtrAddFlags = MI.getFlags();
5253 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5254 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5255 bool IsInBounds =
5256 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5257 unsigned Flags = 0;
5258 if (IsNoUWrap)
5260 if (IsInBounds) {
5263 }
5264
5265 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5266 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5267 Observer.changingInstr(MI);
5268 MI.getOperand(1).setReg(LHSSrc1);
5269 MI.getOperand(2).setReg(NewCst.getReg(0));
5270 MI.setFlags(Flags);
5271 Observer.changedInstr(MI);
5272 };
5273 return !reassociationCanBreakAddressingModePattern(MI);
5274}
5275
5277 BuildFnTy &MatchInfo) const {
5278 auto &PtrAdd = cast<GPtrAdd>(MI);
5279 // We're trying to match a few pointer computation patterns here for
5280 // re-association opportunities.
5281 // 1) Isolating a constant operand to be on the RHS, e.g.:
5282 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5283 //
5284 // 2) Folding two constants in each sub-tree as long as such folding
5285 // doesn't break a legal addressing mode.
5286 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5287 //
5288 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5289 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5290 // iif (G_PTR_ADD X, C) has one use.
5291 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5292 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5293
5294 // Try to match example 2.
5295 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5296 return true;
5297
5298 // Try to match example 3.
5299 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5300 return true;
5301
5302 // Try to match example 1.
5303 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5304 return true;
5305
5306 return false;
5307}
5309 Register OpLHS, Register OpRHS,
5310 BuildFnTy &MatchInfo) const {
5311 LLT OpRHSTy = MRI.getType(OpRHS);
5312 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5313
5314 if (OpLHSDef->getOpcode() != Opc)
5315 return false;
5316
5317 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5318 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5319 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5320
5321 // If the inner op is (X op C), pull the constant out so it can be folded with
5322 // other constants in the expression tree. Folding is not guaranteed so we
5323 // might have (C1 op C2). In that case do not pull a constant out because it
5324 // won't help and can lead to infinite loops.
5325 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5326 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5327 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5328 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5329 MatchInfo = [=](MachineIRBuilder &B) {
5330 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5331 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5332 };
5333 return true;
5334 }
5335 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5336 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5337 // iff (op x, c1) has one use
5338 MatchInfo = [=](MachineIRBuilder &B) {
5339 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5340 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5341 };
5342 return true;
5343 }
5344 }
5345
5346 return false;
5347}
5348
5350 BuildFnTy &MatchInfo) const {
5351 // We don't check if the reassociation will break a legal addressing mode
5352 // here since pointer arithmetic is handled by G_PTR_ADD.
5353 unsigned Opc = MI.getOpcode();
5354 Register DstReg = MI.getOperand(0).getReg();
5355 Register LHSReg = MI.getOperand(1).getReg();
5356 Register RHSReg = MI.getOperand(2).getReg();
5357
5358 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5359 return true;
5360 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5361 return true;
5362 return false;
5363}
5364
5366 APInt &MatchInfo) const {
5367 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5368 Register SrcOp = MI.getOperand(1).getReg();
5369
5370 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5371 MatchInfo = *MaybeCst;
5372 return true;
5373 }
5374
5375 return false;
5376}
5377
5379 BuildFnTy &MatchInfo) const {
5380 Register Dst = MI.getOperand(0).getReg();
5381 auto Csts = ConstantFoldUnaryIntOp(MI.getOpcode(), MRI.getType(Dst),
5382 MI.getOperand(1).getReg(), MRI);
5383 if (Csts.empty())
5384 return false;
5385
5386 MatchInfo = [Dst, Csts = std::move(Csts)](MachineIRBuilder &B) {
5387 if (Csts.size() == 1)
5388 B.buildConstant(Dst, Csts[0]);
5389 else
5390 B.buildBuildVectorConstant(Dst, Csts);
5391 };
5392 return true;
5393}
5394
5396 APInt &MatchInfo) const {
5397 Register Op1 = MI.getOperand(1).getReg();
5398 Register Op2 = MI.getOperand(2).getReg();
5399 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5400 if (!MaybeCst)
5401 return false;
5402 MatchInfo = *MaybeCst;
5403 return true;
5404}
5405
5407 ConstantFP *&MatchInfo) const {
5408 Register Op1 = MI.getOperand(1).getReg();
5409 Register Op2 = MI.getOperand(2).getReg();
5410 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5411 if (!MaybeCst)
5412 return false;
5413 MatchInfo =
5414 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5415 return true;
5416}
5417
5419 ConstantFP *&MatchInfo) const {
5420 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5421 MI.getOpcode() == TargetOpcode::G_FMAD);
5422 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5423
5424 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5425 if (!Op3Cst)
5426 return false;
5427
5428 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5429 if (!Op2Cst)
5430 return false;
5431
5432 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5433 if (!Op1Cst)
5434 return false;
5435
5436 APFloat Op1F = Op1Cst->getValueAPF();
5437 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5439 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5440 return true;
5441}
5442
5445 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5446 // Look for a binop feeding into an AND with a mask:
5447 //
5448 // %add = G_ADD %lhs, %rhs
5449 // %and = G_AND %add, 000...11111111
5450 //
5451 // Check if it's possible to perform the binop at a narrower width and zext
5452 // back to the original width like so:
5453 //
5454 // %narrow_lhs = G_TRUNC %lhs
5455 // %narrow_rhs = G_TRUNC %rhs
5456 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5457 // %new_add = G_ZEXT %narrow_add
5458 // %and = G_AND %new_add, 000...11111111
5459 //
5460 // This can allow later combines to eliminate the G_AND if it turns out
5461 // that the mask is irrelevant.
5462 assert(MI.getOpcode() == TargetOpcode::G_AND);
5463 Register Dst = MI.getOperand(0).getReg();
5464 Register AndLHS = MI.getOperand(1).getReg();
5465 Register AndRHS = MI.getOperand(2).getReg();
5466 LLT WideTy = MRI.getType(Dst);
5467
5468 // If the potential binop has more than one use, then it's possible that one
5469 // of those uses will need its full width.
5470 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5471 return false;
5472
5473 // Check if the LHS feeding the AND is impacted by the high bits that we're
5474 // masking out.
5475 //
5476 // e.g. for 64-bit x, y:
5477 //
5478 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5479 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5480 if (!LHSInst)
5481 return false;
5482 unsigned LHSOpc = LHSInst->getOpcode();
5483 switch (LHSOpc) {
5484 default:
5485 return false;
5486 case TargetOpcode::G_ADD:
5487 case TargetOpcode::G_SUB:
5488 case TargetOpcode::G_MUL:
5489 case TargetOpcode::G_AND:
5490 case TargetOpcode::G_OR:
5491 case TargetOpcode::G_XOR:
5492 break;
5493 }
5494
5495 // Find the mask on the RHS.
5496 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5497 if (!Cst)
5498 return false;
5499 auto Mask = Cst->Value;
5500 if (!Mask.isMask())
5501 return false;
5502
5503 // No point in combining if there's nothing to truncate.
5504 unsigned NarrowWidth = Mask.countr_one();
5505 if (NarrowWidth == WideTy.getSizeInBits())
5506 return false;
5507 LLT NarrowTy = LLT::integer(NarrowWidth);
5508
5509 // Check if adding the zext + truncates could be harmful.
5510 auto &MF = *MI.getMF();
5511 const auto &TLI = getTargetLowering();
5512 LLVMContext &Ctx = MF.getFunction().getContext();
5513 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5514 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5515 return false;
5516 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5517 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5518 return false;
5519 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5520 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5521 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5522 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5523 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5524 auto NarrowBinOp =
5525 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5526 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5527 Observer.changingInstr(MI);
5528 MI.getOperand(1).setReg(Ext.getReg(0));
5529 Observer.changedInstr(MI);
5530 };
5531 return true;
5532}
5533
5535 BuildFnTy &MatchInfo) const {
5536 unsigned Opc = MI.getOpcode();
5537 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5538
5539 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5540 return false;
5541
5542 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5543 Observer.changingInstr(MI);
5544 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5545 : TargetOpcode::G_SADDO;
5546 MI.setDesc(Builder.getTII().get(NewOpc));
5547 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5548 Observer.changedInstr(MI);
5549 };
5550 return true;
5551}
5552
5554 BuildFnTy &MatchInfo) const {
5555 // (G_*MULO x, 0) -> 0 + no carry out
5556 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5557 MI.getOpcode() == TargetOpcode::G_SMULO);
5558 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5559 return false;
5560 Register Dst = MI.getOperand(0).getReg();
5561 Register Carry = MI.getOperand(1).getReg();
5562 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5563 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5564 return false;
5565 MatchInfo = [=](MachineIRBuilder &B) {
5566 B.buildConstant(Dst, 0);
5567 B.buildConstant(Carry, 0);
5568 };
5569 return true;
5570}
5571
5573 BuildFnTy &MatchInfo) const {
5574 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5575 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5576 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5577 MI.getOpcode() == TargetOpcode::G_SADDE ||
5578 MI.getOpcode() == TargetOpcode::G_USUBE ||
5579 MI.getOpcode() == TargetOpcode::G_SSUBE);
5580 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5581 return false;
5582 MatchInfo = [&](MachineIRBuilder &B) {
5583 unsigned NewOpcode;
5584 switch (MI.getOpcode()) {
5585 case TargetOpcode::G_UADDE:
5586 NewOpcode = TargetOpcode::G_UADDO;
5587 break;
5588 case TargetOpcode::G_SADDE:
5589 NewOpcode = TargetOpcode::G_SADDO;
5590 break;
5591 case TargetOpcode::G_USUBE:
5592 NewOpcode = TargetOpcode::G_USUBO;
5593 break;
5594 case TargetOpcode::G_SSUBE:
5595 NewOpcode = TargetOpcode::G_SSUBO;
5596 break;
5597 }
5598 Observer.changingInstr(MI);
5599 MI.setDesc(B.getTII().get(NewOpcode));
5600 MI.removeOperand(4);
5601 Observer.changedInstr(MI);
5602 };
5603 return true;
5604}
5605
5607 BuildFnTy &MatchInfo) const {
5608 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5609 Register Dst = MI.getOperand(0).getReg();
5610 // (x + y) - z -> x (if y == z)
5611 // (x + y) - z -> y (if x == z)
5612 Register X, Y, Z;
5613 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5614 Register ReplaceReg;
5615 int64_t CstX, CstY;
5616 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5618 ReplaceReg = X;
5619 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5621 ReplaceReg = Y;
5622 if (ReplaceReg) {
5623 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5624 return true;
5625 }
5626 }
5627
5628 // x - (y + z) -> 0 - y (if x == z)
5629 // x - (y + z) -> 0 - z (if x == y)
5630 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5631 Register ReplaceReg;
5632 int64_t CstX;
5633 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5635 ReplaceReg = Y;
5636 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5638 ReplaceReg = Z;
5639 if (ReplaceReg) {
5640 MatchInfo = [=](MachineIRBuilder &B) {
5641 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5642 B.buildSub(Dst, Zero, ReplaceReg);
5643 };
5644 return true;
5645 }
5646 }
5647 return false;
5648}
5649
5651 unsigned Opcode = MI.getOpcode();
5652 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5653 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5654 Register Dst = UDivorRem.getReg(0);
5655 Register LHS = UDivorRem.getReg(1);
5656 Register RHS = UDivorRem.getReg(2);
5657 LLT Ty = MRI.getType(Dst);
5658 LLT ScalarTy = Ty.getScalarType();
5659 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5661 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5662
5663 auto &MIB = Builder;
5664
5665 bool UseSRL = false;
5666 SmallVector<Register, 16> Shifts, Factors;
5667 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5668 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5669
5670 auto BuildExactUDIVPattern = [&](const Constant *C) {
5671 // Don't recompute inverses for each splat element.
5672 if (IsSplat && !Factors.empty()) {
5673 Shifts.push_back(Shifts[0]);
5674 Factors.push_back(Factors[0]);
5675 return true;
5676 }
5677
5678 auto *CI = cast<ConstantInt>(C);
5679 APInt Divisor = CI->getValue();
5680 unsigned Shift = Divisor.countr_zero();
5681 if (Shift) {
5682 Divisor.lshrInPlace(Shift);
5683 UseSRL = true;
5684 }
5685
5686 // Calculate the multiplicative inverse modulo BW.
5687 APInt Factor = Divisor.multiplicativeInverse();
5688 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5689 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5690 return true;
5691 };
5692
5693 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5694 // Collect all magic values from the build vector.
5695 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5696 llvm_unreachable("Expected unary predicate match to succeed");
5697
5698 Register Shift, Factor;
5699 if (Ty.isVector()) {
5700 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5701 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5702 } else {
5703 Shift = Shifts[0];
5704 Factor = Factors[0];
5705 }
5706
5707 Register Res = LHS;
5708
5709 if (UseSRL)
5710 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5711
5712 return MIB.buildMul(Ty, Res, Factor);
5713 }
5714
5715 unsigned KnownLeadingZeros =
5716 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5717
5718 bool UseNPQ = false;
5719 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5720 auto BuildUDIVPattern = [&](const Constant *C) {
5721 auto *CI = cast<ConstantInt>(C);
5722 const APInt &Divisor = CI->getValue();
5723
5724 bool SelNPQ = false;
5725 APInt Magic(Divisor.getBitWidth(), 0);
5726 unsigned PreShift = 0, PostShift = 0;
5727
5728 // Magic algorithm doesn't work for division by 1. We need to emit a select
5729 // at the end.
5730 // TODO: Use undef values for divisor of 1.
5731 if (!Divisor.isOne()) {
5732
5733 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5734 // in the dividend exceeds the leading zeros for the divisor.
5737 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5738
5739 Magic = std::move(magics.Magic);
5740
5741 assert(magics.PreShift < Divisor.getBitWidth() &&
5742 "We shouldn't generate an undefined shift!");
5743 assert(magics.PostShift < Divisor.getBitWidth() &&
5744 "We shouldn't generate an undefined shift!");
5745 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5746 PreShift = magics.PreShift;
5747 PostShift = magics.PostShift;
5748 SelNPQ = magics.IsAdd;
5749 }
5750
5751 PreShifts.push_back(
5752 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5753 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5754 NPQFactors.push_back(
5755 MIB.buildConstant(ScalarTy,
5756 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5757 : APInt::getZero(EltBits))
5758 .getReg(0));
5759 PostShifts.push_back(
5760 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5761 UseNPQ |= SelNPQ;
5762 return true;
5763 };
5764
5765 // Collect the shifts/magic values from each element.
5766 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5767 (void)Matched;
5768 assert(Matched && "Expected unary predicate match to succeed");
5769
5770 Register PreShift, PostShift, MagicFactor, NPQFactor;
5771 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5772 if (RHSDef) {
5773 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5774 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5775 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5776 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5777 } else {
5778 assert(MRI.getType(RHS).isScalar() &&
5779 "Non-build_vector operation should have been a scalar");
5780 PreShift = PreShifts[0];
5781 MagicFactor = MagicFactors[0];
5782 PostShift = PostShifts[0];
5783 }
5784
5785 Register Q = LHS;
5786 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5787
5788 // Multiply the numerator (operand 0) by the magic value.
5789 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5790
5791 if (UseNPQ) {
5792 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5793
5794 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5795 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5796 if (Ty.isVector())
5797 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5798 else
5799 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5800
5801 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5802 }
5803
5804 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5805 auto One = MIB.buildConstant(Ty, 1);
5806 auto IsOne = MIB.buildICmp(
5808 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5809 RHS, One);
5810 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5811
5812 if (Opcode == TargetOpcode::G_UREM) {
5813 auto Prod = MIB.buildMul(Ty, ret, RHS);
5814 return MIB.buildSub(Ty, LHS, Prod);
5815 }
5816 return ret;
5817}
5818
5820 unsigned Opcode = MI.getOpcode();
5821 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5822 Register Dst = MI.getOperand(0).getReg();
5823 Register RHS = MI.getOperand(2).getReg();
5824 LLT DstTy = MRI.getType(Dst);
5825
5826 auto &MF = *MI.getMF();
5827 AttributeList Attr = MF.getFunction().getAttributes();
5828 const auto &TLI = getTargetLowering();
5829 LLVMContext &Ctx = MF.getFunction().getContext();
5830 if (DstTy.getScalarSizeInBits() == 1 ||
5831 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5832 return false;
5833
5834 // Don't do this for minsize because the instruction sequence is usually
5835 // larger.
5836 if (MF.getFunction().hasMinSize())
5837 return false;
5838
5839 if (Opcode == TargetOpcode::G_UDIV &&
5841 return matchUnaryPredicate(
5842 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5843 }
5844
5845 auto *RHSDef = MRI.getVRegDef(RHS);
5846 if (!isConstantOrConstantVector(*RHSDef, MRI))
5847 return false;
5848
5849 // Don't do this if the types are not going to be legal.
5850 if (LI) {
5851 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5852 return false;
5853 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5854 return false;
5856 {TargetOpcode::G_ICMP,
5857 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5858 DstTy}}))
5859 return false;
5860 if (Opcode == TargetOpcode::G_UREM &&
5861 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5862 return false;
5863 }
5864
5865 return matchUnaryPredicate(
5866 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5867}
5868
5870 auto *NewMI = buildUDivOrURemUsingMul(MI);
5871 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5872}
5873
5875 unsigned Opcode = MI.getOpcode();
5876 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5877 Register Dst = MI.getOperand(0).getReg();
5878 Register RHS = MI.getOperand(2).getReg();
5879 LLT DstTy = MRI.getType(Dst);
5880 auto SizeInBits = DstTy.getScalarSizeInBits();
5881 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5882
5883 auto &MF = *MI.getMF();
5884 AttributeList Attr = MF.getFunction().getAttributes();
5885 const auto &TLI = getTargetLowering();
5886 LLVMContext &Ctx = MF.getFunction().getContext();
5887 if (DstTy.getScalarSizeInBits() < 3 ||
5888 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5889 return false;
5890
5891 // Don't do this for minsize because the instruction sequence is usually
5892 // larger.
5893 if (MF.getFunction().hasMinSize())
5894 return false;
5895
5896 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5897 if (Opcode == TargetOpcode::G_SDIV &&
5899 return matchUnaryPredicate(
5900 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5901 }
5902
5903 auto *RHSDef = MRI.getVRegDef(RHS);
5904 if (!isConstantOrConstantVector(*RHSDef, MRI))
5905 return false;
5906
5907 // Don't do this if the types are not going to be legal.
5908 if (LI) {
5909 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5910 return false;
5911 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5912 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5913 return false;
5914 if (Opcode == TargetOpcode::G_SREM &&
5915 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5916 return false;
5917 }
5918
5919 return matchUnaryPredicate(
5920 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5921}
5922
5924 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5925 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5926}
5927
5929 unsigned Opcode = MI.getOpcode();
5930 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5931 Opcode == TargetOpcode::G_SREM);
5932 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5933 Register Dst = SDivorRem.getReg(0);
5934 Register LHS = SDivorRem.getReg(1);
5935 Register RHS = SDivorRem.getReg(2);
5936 LLT Ty = MRI.getType(Dst);
5937 LLT ScalarTy = Ty.getScalarType();
5938 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5940 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5941 auto &MIB = Builder;
5942
5943 bool UseSRA = false;
5944 SmallVector<Register, 16> ExactShifts, ExactFactors;
5945
5946 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5947 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5948
5949 auto BuildExactSDIVPattern = [&](const Constant *C) {
5950 // Don't recompute inverses for each splat element.
5951 if (IsSplat && !ExactFactors.empty()) {
5952 ExactShifts.push_back(ExactShifts[0]);
5953 ExactFactors.push_back(ExactFactors[0]);
5954 return true;
5955 }
5956
5957 auto *CI = cast<ConstantInt>(C);
5958 APInt Divisor = CI->getValue();
5959 unsigned Shift = Divisor.countr_zero();
5960 if (Shift) {
5961 Divisor.ashrInPlace(Shift);
5962 UseSRA = true;
5963 }
5964
5965 // Calculate the multiplicative inverse modulo BW.
5966 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5967 APInt Factor = Divisor.multiplicativeInverse();
5968 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5969 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5970 return true;
5971 };
5972
5973 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5974 // Collect all magic values from the build vector.
5975 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5976 (void)Matched;
5977 assert(Matched && "Expected unary predicate match to succeed");
5978
5979 Register Shift, Factor;
5980 if (Ty.isVector()) {
5981 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5982 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5983 } else {
5984 Shift = ExactShifts[0];
5985 Factor = ExactFactors[0];
5986 }
5987
5988 Register Res = LHS;
5989
5990 if (UseSRA)
5991 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5992
5993 return MIB.buildMul(Ty, Res, Factor);
5994 }
5995
5996 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5997
5998 auto BuildSDIVPattern = [&](const Constant *C) {
5999 auto *CI = cast<ConstantInt>(C);
6000 const APInt &Divisor = CI->getValue();
6001
6004 int NumeratorFactor = 0;
6005 int ShiftMask = -1;
6006
6007 if (Divisor.isOne() || Divisor.isAllOnes()) {
6008 // If d is +1/-1, we just multiply the numerator by +1/-1.
6009 NumeratorFactor = Divisor.getSExtValue();
6010 Magics.Magic = 0;
6011 Magics.ShiftAmount = 0;
6012 ShiftMask = 0;
6013 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
6014 // If d > 0 and m < 0, add the numerator.
6015 NumeratorFactor = 1;
6016 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
6017 // If d < 0 and m > 0, subtract the numerator.
6018 NumeratorFactor = -1;
6019 }
6020
6021 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
6022 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
6023 Shifts.push_back(
6024 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
6025 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
6026
6027 return true;
6028 };
6029
6030 // Collect the shifts/magic values from each element.
6031 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
6032 (void)Matched;
6033 assert(Matched && "Expected unary predicate match to succeed");
6034
6035 Register MagicFactor, Factor, Shift, ShiftMask;
6036 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
6037 if (RHSDef) {
6038 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
6039 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
6040 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
6041 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
6042 } else {
6043 assert(MRI.getType(RHS).isScalar() &&
6044 "Non-build_vector operation should have been a scalar");
6045 MagicFactor = MagicFactors[0];
6046 Factor = Factors[0];
6047 Shift = Shifts[0];
6048 ShiftMask = ShiftMasks[0];
6049 }
6050
6051 Register Q = LHS;
6052 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
6053
6054 // (Optionally) Add/subtract the numerator using Factor.
6055 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
6056 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
6057
6058 // Shift right algebraic by shift value.
6059 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
6060
6061 // Extract the sign bit, mask it and add it to the quotient.
6062 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
6063 auto T = MIB.buildLShr(Ty, Q, SignShift);
6064 T = MIB.buildAnd(Ty, T, ShiftMask);
6065 auto ret = MIB.buildAdd(Ty, Q, T);
6066
6067 if (Opcode == TargetOpcode::G_SREM) {
6068 auto Prod = MIB.buildMul(Ty, ret, RHS);
6069 return MIB.buildSub(Ty, LHS, Prod);
6070 }
6071 return ret;
6072}
6073
6075 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6076 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6077 "Expected SDIV or UDIV");
6078 auto &Div = cast<GenericMachineInstr>(MI);
6079 Register RHS = Div.getReg(2);
6080 auto MatchPow2 = [&](const Constant *C) {
6081 auto *CI = dyn_cast<ConstantInt>(C);
6082 return CI && (CI->getValue().isPowerOf2() ||
6083 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6084 };
6085 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6086}
6087
6089 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6090 auto &SDiv = cast<GenericMachineInstr>(MI);
6091 Register Dst = SDiv.getReg(0);
6092 Register LHS = SDiv.getReg(1);
6093 Register RHS = SDiv.getReg(2);
6094 LLT Ty = MRI.getType(Dst);
6096 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6097 : LLT::integer(1);
6098
6099 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6100 // to the following version:
6101 //
6102 // %c1 = G_CTTZ %rhs
6103 // %inexact = G_SUB $bitwidth, %c1
6104 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6105 // %lshr = G_LSHR %sign, %inexact
6106 // %add = G_ADD %lhs, %lshr
6107 // %ashr = G_ASHR %add, %c1
6108 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6109 // %zero = G_CONSTANT $0
6110 // %neg = G_NEG %ashr
6111 // %isneg = G_ICMP SLT %rhs, %zero
6112 // %res = G_SELECT %isneg, %neg, %ashr
6113
6114 unsigned BitWidth = Ty.getScalarSizeInBits();
6115 auto Zero = Builder.buildConstant(Ty, 0);
6116
6117 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6118 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6119 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6120 // Splat the sign bit into the register
6121 auto Sign = Builder.buildAShr(
6122 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6123
6124 // Add (LHS < 0) ? abs2 - 1 : 0;
6125 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6126 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6127 auto AShr = Builder.buildAShr(Ty, Add, C1);
6128
6129 // Special case: (sdiv X, 1) -> X
6130 // Special Case: (sdiv X, -1) -> 0-X
6131 auto One = Builder.buildConstant(Ty, 1);
6132 auto MinusOne = Builder.buildConstant(Ty, -1);
6133 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6134 auto IsMinusOne =
6135 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6136 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6137 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6138
6139 // If divided by a positive value, we're done. Otherwise, the result must be
6140 // negated.
6141 auto Neg = Builder.buildNeg(Ty, AShr);
6142 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6143 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6144 MI.eraseFromParent();
6145}
6146
6148 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6149 auto &UDiv = cast<GenericMachineInstr>(MI);
6150 Register Dst = UDiv.getReg(0);
6151 Register LHS = UDiv.getReg(1);
6152 Register RHS = UDiv.getReg(2);
6153 LLT Ty = MRI.getType(Dst);
6155
6156 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6157 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6158 MI.eraseFromParent();
6159}
6160
6162 assert(MI.getOpcode() == TargetOpcode::G_SREM && "Expected SREM");
6163 auto &SRem = cast<GBinOp>(MI);
6164 Register Dst = SRem.getReg(0);
6165 Register LHS = SRem.getLHSReg();
6166 Register RHS = SRem.getRHSReg();
6167 LLT Ty = MRI.getType(Dst);
6169
6170 // Effectively we want to lower G_SREM %lhs, %rhs, where %rhs is +/- a power
6171 // of 2, to the following branch-free bias-and-mask version:
6172 //
6173 // %abs = G_ABS %rhs
6174 // %mask = G_SUB %abs, 1
6175 // %sign = G_ASHR %lhs, $(bitwidth - 1)
6176 // %bias = G_AND %sign, %mask
6177 // %biased = G_ADD %lhs, %bias
6178 // %masked = G_AND %biased, %mask
6179 // %res = G_SUB %masked, %bias
6180 //
6181 // The bias adds (|%rhs| - 1) for negative %lhs, correcting rounding towards
6182 // zero (instead of towards -inf that a plain mask would give). Constant
6183 // divisors collapse %mask to a single G_CONSTANT via the CSEMIRBuilder folds
6184 // for G_ABS and G_SUB.
6185
6186 unsigned BitWidth = Ty.getScalarSizeInBits();
6187 auto AbsRHS = Builder.buildAbs(Ty, RHS);
6188 auto Mask = Builder.buildSub(Ty, AbsRHS, Builder.buildConstant(Ty, 1));
6189 auto BWMinusOne = Builder.buildConstant(ShiftAmtTy, BitWidth - 1);
6190 auto Sign = Builder.buildAShr(Ty, LHS, BWMinusOne);
6191 auto Bias = Builder.buildAnd(Ty, Sign, Mask);
6192 auto Biased = Builder.buildAdd(Ty, LHS, Bias);
6193 auto Masked = Builder.buildAnd(Ty, Biased, Mask);
6194 Builder.buildSub(Dst, Masked, Bias);
6195 MI.eraseFromParent();
6196}
6197
6199 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6200 Register RHS = MI.getOperand(2).getReg();
6201 Register Dst = MI.getOperand(0).getReg();
6202 LLT Ty = MRI.getType(Dst);
6203 LLT RHSTy = MRI.getType(RHS);
6205 auto MatchPow2ExceptOne = [&](const Constant *C) {
6206 if (auto *CI = dyn_cast<ConstantInt>(C))
6207 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6208 return false;
6209 };
6210 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6211 return false;
6212 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6213 // get log base 2, and it is not always legal for on a target.
6214 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6215 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6216}
6217
6219 Register LHS = MI.getOperand(1).getReg();
6220 Register RHS = MI.getOperand(2).getReg();
6221 Register Dst = MI.getOperand(0).getReg();
6222 LLT Ty = MRI.getType(Dst);
6224 unsigned NumEltBits = Ty.getScalarSizeInBits();
6225
6226 auto LogBase2 = buildLogBase2(RHS, Builder);
6227 auto ShiftAmt =
6228 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6229 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6230 Builder.buildLShr(Dst, LHS, Trunc);
6231 MI.eraseFromParent();
6232}
6233
6235 Register &MatchInfo) const {
6236 Register Dst = MI.getOperand(0).getReg();
6237 Register Src = MI.getOperand(1).getReg();
6238 LLT DstTy = MRI.getType(Dst);
6239 LLT SrcTy = MRI.getType(Src);
6240 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6241 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6242 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6243
6245 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6246 return false;
6247
6248 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6249 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6250 return mi_match(Src, MRI,
6251 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6252 m_SpecificICstOrSplat(SignedMin)),
6253 m_SpecificICstOrSplat(SignedMax))) ||
6254 mi_match(Src, MRI,
6255 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6256 m_SpecificICstOrSplat(SignedMax)),
6257 m_SpecificICstOrSplat(SignedMin)));
6258}
6259
6261 Register &MatchInfo) const {
6262 Register Dst = MI.getOperand(0).getReg();
6263 Builder.buildTruncSSatS(Dst, MatchInfo);
6264 MI.eraseFromParent();
6265}
6266
6268 Register &MatchInfo) const {
6269 Register Dst = MI.getOperand(0).getReg();
6270 Register Src = MI.getOperand(1).getReg();
6271 LLT DstTy = MRI.getType(Dst);
6272 LLT SrcTy = MRI.getType(Src);
6273 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6274 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6275 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6276
6278 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6279 return false;
6280 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6281 return mi_match(Src, MRI,
6283 m_SpecificICstOrSplat(UnsignedMax))) ||
6284 mi_match(Src, MRI,
6285 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6286 m_SpecificICstOrSplat(UnsignedMax)),
6287 m_SpecificICstOrSplat(0))) ||
6288 mi_match(Src, MRI,
6290 m_SpecificICstOrSplat(UnsignedMax)));
6291}
6292
6294 Register &MatchInfo) const {
6295 Register Dst = MI.getOperand(0).getReg();
6296 Builder.buildTruncSSatU(Dst, MatchInfo);
6297 MI.eraseFromParent();
6298}
6299
6301 MachineInstr &MinMI) const {
6302 Register Min = MinMI.getOperand(2).getReg();
6303 Register Val = MinMI.getOperand(1).getReg();
6304 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6305 LLT SrcTy = MRI.getType(Val);
6306 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6307 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6308 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6309
6311 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6312 return false;
6313 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6314 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6315 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6316}
6317
6319 MachineInstr &SrcMI) const {
6320 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6321 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6322
6323 return LI &&
6324 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6325}
6326
6328 BuildFnTy &MatchInfo) const {
6329 unsigned Opc = MI.getOpcode();
6330 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6331 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6332 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6333
6334 Register Dst = MI.getOperand(0).getReg();
6335 Register X = MI.getOperand(1).getReg();
6336 Register Y = MI.getOperand(2).getReg();
6337 LLT Type = MRI.getType(Dst);
6338
6339 // fold (fadd x, fneg(y)) -> (fsub x, y)
6340 // fold (fadd fneg(y), x) -> (fsub x, y)
6341 // G_ADD is commutative so both cases are checked by m_GFAdd
6342 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6343 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6344 Opc = TargetOpcode::G_FSUB;
6345 }
6346 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6347 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6348 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6349 Opc = TargetOpcode::G_FADD;
6350 }
6351 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6352 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6353 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6354 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6355 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6356 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6357 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6358 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6359 // no opcode change
6360 } else
6361 return false;
6362
6363 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6364 Observer.changingInstr(MI);
6365 MI.setDesc(B.getTII().get(Opc));
6366 MI.getOperand(1).setReg(X);
6367 MI.getOperand(2).setReg(Y);
6368 Observer.changedInstr(MI);
6369 };
6370 return true;
6371}
6372
6374 Register &MatchInfo) const {
6375 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6376
6377 Register LHS = MI.getOperand(1).getReg();
6378 MatchInfo = MI.getOperand(2).getReg();
6379 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6380
6381 const auto LHSCst = Ty.isVector()
6382 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6384 if (!LHSCst)
6385 return false;
6386
6387 // -0.0 is always allowed
6388 if (LHSCst->Value.isNegZero())
6389 return true;
6390
6391 // +0.0 is only allowed if nsz is set.
6392 if (LHSCst->Value.isPosZero())
6393 return MI.getFlag(MachineInstr::FmNsz);
6394
6395 return false;
6396}
6397
6399 Register &MatchInfo) const {
6400 Register Dst = MI.getOperand(0).getReg();
6401 Builder.buildFNeg(
6402 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6403 eraseInst(MI);
6404}
6405
6406/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6407/// due to global flags or MachineInstr flags.
6408static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6409 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6410 return false;
6411 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6412}
6413
6414static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6415 const MachineRegisterInfo &MRI) {
6416 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6417 MRI.use_instr_nodbg_end()) >
6418 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6419 MRI.use_instr_nodbg_end());
6420}
6421
6423 bool &AllowFusionGlobally,
6424 bool &HasFMAD, bool &Aggressive,
6425 bool CanReassociate) const {
6426
6427 auto *MF = MI.getMF();
6428 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6429 const TargetOptions &Options = MF->getTarget().Options;
6430 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6431
6432 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6433 return false;
6434
6435 // Floating-point multiply-add with intermediate rounding.
6436 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6437 // Floating-point multiply-add without intermediate rounding.
6438 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6439 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6440 // No valid opcode, do not combine.
6441 if (!HasFMAD && !HasFMA)
6442 return false;
6443
6444 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6445 // If the addition is not contractable, do not combine.
6446 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6447 return false;
6448
6449 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6450 return true;
6451}
6452
6455 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6456 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6457
6458 bool AllowFusionGlobally, HasFMAD, Aggressive;
6459 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6460 return false;
6461
6462 Register Op1 = MI.getOperand(1).getReg();
6463 Register Op2 = MI.getOperand(2).getReg();
6464 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6465 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6466 unsigned PreferredFusedOpcode =
6467 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6468
6469 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6470 // prefer to fold the multiply with fewer uses.
6471 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6472 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6473 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6474 std::swap(LHS, RHS);
6475 }
6476
6477 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6478 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6479 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6480 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6481 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6482 {LHS.MI->getOperand(1).getReg(),
6483 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6484 };
6485 return true;
6486 }
6487
6488 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6489 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6490 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6491 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6492 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6493 {RHS.MI->getOperand(1).getReg(),
6494 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6495 };
6496 return true;
6497 }
6498
6499 return false;
6500}
6501
6504 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6505 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6506
6507 bool AllowFusionGlobally, HasFMAD, Aggressive;
6508 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6509 return false;
6510
6511 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6512 Register Op1 = MI.getOperand(1).getReg();
6513 Register Op2 = MI.getOperand(2).getReg();
6514 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6515 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6516 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6517
6518 unsigned PreferredFusedOpcode =
6519 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6520
6521 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6522 // prefer to fold the multiply with fewer uses.
6523 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6524 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6525 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6526 std::swap(LHS, RHS);
6527 }
6528
6529 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6530 MachineInstr *FpExtSrc;
6531 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6532 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6533 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6534 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6535 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6536 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6537 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6538 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6539 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6540 };
6541 return true;
6542 }
6543
6544 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6545 // Note: Commutes FADD operands.
6546 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6547 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6548 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6549 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6550 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6551 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6552 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6553 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6554 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6555 };
6556 return true;
6557 }
6558
6559 return false;
6560}
6561
6564 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6565 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6566
6567 bool AllowFusionGlobally, HasFMAD, Aggressive;
6568 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6569 return false;
6570
6571 Register Op1 = MI.getOperand(1).getReg();
6572 Register Op2 = MI.getOperand(2).getReg();
6573 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6574 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6575 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6576
6577 unsigned PreferredFusedOpcode =
6578 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6579
6580 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6581 // prefer to fold the multiply with fewer uses.
6582 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6583 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6584 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6585 std::swap(LHS, RHS);
6586 }
6587
6588 MachineInstr *FMA = nullptr;
6589 Register Z;
6590 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6591 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6592 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6593 TargetOpcode::G_FMUL) &&
6594 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6595 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6596 FMA = LHS.MI;
6597 Z = RHS.Reg;
6598 }
6599 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6600 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6601 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6602 TargetOpcode::G_FMUL) &&
6603 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6604 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6605 Z = LHS.Reg;
6606 FMA = RHS.MI;
6607 }
6608
6609 if (FMA) {
6610 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6611 Register X = FMA->getOperand(1).getReg();
6612 Register Y = FMA->getOperand(2).getReg();
6613 Register U = FMulMI->getOperand(1).getReg();
6614 Register V = FMulMI->getOperand(2).getReg();
6615
6616 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6617 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6618 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6619 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6620 {X, Y, InnerFMA});
6621 };
6622 return true;
6623 }
6624
6625 return false;
6626}
6627
6630 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6631 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6632
6633 bool AllowFusionGlobally, HasFMAD, Aggressive;
6634 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6635 return false;
6636
6637 if (!Aggressive)
6638 return false;
6639
6640 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6641 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6642 Register Op1 = MI.getOperand(1).getReg();
6643 Register Op2 = MI.getOperand(2).getReg();
6644 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6645 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6646
6647 unsigned PreferredFusedOpcode =
6648 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6649
6650 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6651 // prefer to fold the multiply with fewer uses.
6652 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6653 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6654 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6655 std::swap(LHS, RHS);
6656 }
6657
6658 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6659 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6661 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6662 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6663 Register InnerFMA =
6664 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6665 .getReg(0);
6666 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6667 {X, Y, InnerFMA});
6668 };
6669
6670 MachineInstr *FMulMI, *FMAMI;
6671 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6672 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6673 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6674 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6675 m_GFPExt(m_MInstr(FMulMI))) &&
6676 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6677 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6678 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6679 MatchInfo = [=](MachineIRBuilder &B) {
6680 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6681 FMulMI->getOperand(2).getReg(), RHS.Reg,
6682 LHS.MI->getOperand(1).getReg(),
6683 LHS.MI->getOperand(2).getReg(), B);
6684 };
6685 return true;
6686 }
6687
6688 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6689 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6690 // FIXME: This turns two single-precision and one double-precision
6691 // operation into two double-precision operations, which might not be
6692 // interesting for all targets, especially GPUs.
6693 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6694 FMAMI->getOpcode() == PreferredFusedOpcode) {
6695 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6696 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6697 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6698 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6699 MatchInfo = [=](MachineIRBuilder &B) {
6700 Register X = FMAMI->getOperand(1).getReg();
6701 Register Y = FMAMI->getOperand(2).getReg();
6702 X = B.buildFPExt(DstType, X).getReg(0);
6703 Y = B.buildFPExt(DstType, Y).getReg(0);
6704 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6705 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6706 };
6707
6708 return true;
6709 }
6710 }
6711
6712 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6713 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6714 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6715 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6716 m_GFPExt(m_MInstr(FMulMI))) &&
6717 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6718 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6719 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6720 MatchInfo = [=](MachineIRBuilder &B) {
6721 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6722 FMulMI->getOperand(2).getReg(), LHS.Reg,
6723 RHS.MI->getOperand(1).getReg(),
6724 RHS.MI->getOperand(2).getReg(), B);
6725 };
6726 return true;
6727 }
6728
6729 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6730 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6731 // FIXME: This turns two single-precision and one double-precision
6732 // operation into two double-precision operations, which might not be
6733 // interesting for all targets, especially GPUs.
6734 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6735 FMAMI->getOpcode() == PreferredFusedOpcode) {
6736 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6737 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6738 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6739 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6740 MatchInfo = [=](MachineIRBuilder &B) {
6741 Register X = FMAMI->getOperand(1).getReg();
6742 Register Y = FMAMI->getOperand(2).getReg();
6743 X = B.buildFPExt(DstType, X).getReg(0);
6744 Y = B.buildFPExt(DstType, Y).getReg(0);
6745 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6746 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6747 };
6748 return true;
6749 }
6750 }
6751
6752 return false;
6753}
6754
6757 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6758 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6759
6760 bool AllowFusionGlobally, HasFMAD, Aggressive;
6761 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6762 return false;
6763
6764 Register Op1 = MI.getOperand(1).getReg();
6765 Register Op2 = MI.getOperand(2).getReg();
6766 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6767 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6768 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6769
6770 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6771 // prefer to fold the multiply with fewer uses.
6772 int FirstMulHasFewerUses = true;
6773 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6774 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6775 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6776 FirstMulHasFewerUses = false;
6777
6778 unsigned PreferredFusedOpcode =
6779 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6780
6781 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6782 if (FirstMulHasFewerUses &&
6783 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6784 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6785 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6786 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6787 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6788 {LHS.MI->getOperand(1).getReg(),
6789 LHS.MI->getOperand(2).getReg(), NegZ});
6790 };
6791 return true;
6792 }
6793 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6794 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6795 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6796 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6797 Register NegY =
6798 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6799 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6800 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6801 };
6802 return true;
6803 }
6804
6805 return false;
6806}
6807
6810 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6811 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6812
6813 bool AllowFusionGlobally, HasFMAD, Aggressive;
6814 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6815 return false;
6816
6817 Register LHSReg = MI.getOperand(1).getReg();
6818 Register RHSReg = MI.getOperand(2).getReg();
6819 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6820
6821 unsigned PreferredFusedOpcode =
6822 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6823
6824 MachineInstr *FMulMI;
6825 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6826 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6827 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6828 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6829 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6830 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6831 Register NegX =
6832 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6833 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6834 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6835 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6836 };
6837 return true;
6838 }
6839
6840 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6841 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6842 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6843 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6844 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6845 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6846 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6847 {FMulMI->getOperand(1).getReg(),
6848 FMulMI->getOperand(2).getReg(), LHSReg});
6849 };
6850 return true;
6851 }
6852
6853 return false;
6854}
6855
6858 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6859 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6860
6861 bool AllowFusionGlobally, HasFMAD, Aggressive;
6862 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6863 return false;
6864
6865 Register LHSReg = MI.getOperand(1).getReg();
6866 Register RHSReg = MI.getOperand(2).getReg();
6867 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6868
6869 unsigned PreferredFusedOpcode =
6870 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6871
6872 MachineInstr *FMulMI;
6873 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6874 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6875 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6876 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6877 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6878 Register FpExtX =
6879 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6880 Register FpExtY =
6881 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6882 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6883 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6884 {FpExtX, FpExtY, NegZ});
6885 };
6886 return true;
6887 }
6888
6889 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6890 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6891 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6892 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6893 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6894 Register FpExtY =
6895 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6896 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6897 Register FpExtZ =
6898 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6899 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6900 {NegY, FpExtZ, LHSReg});
6901 };
6902 return true;
6903 }
6904
6905 return false;
6906}
6907
6910 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6911 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6912
6913 bool AllowFusionGlobally, HasFMAD, Aggressive;
6914 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6915 return false;
6916
6917 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6918 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6919 Register LHSReg = MI.getOperand(1).getReg();
6920 Register RHSReg = MI.getOperand(2).getReg();
6921
6922 unsigned PreferredFusedOpcode =
6923 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6924
6925 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6927 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6928 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6929 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6930 };
6931
6932 MachineInstr *FMulMI;
6933 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6934 // (fneg (fma (fpext x), (fpext y), z))
6935 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6936 // (fneg (fma (fpext x), (fpext y), z))
6937 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6938 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6939 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6940 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6941 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6942 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6943 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6944 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6945 FMulMI->getOperand(2).getReg(), RHSReg, B);
6946 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6947 };
6948 return true;
6949 }
6950
6951 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6952 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6953 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6954 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6955 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6956 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6957 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6958 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6959 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6960 FMulMI->getOperand(2).getReg(), LHSReg, B);
6961 };
6962 return true;
6963 }
6964
6965 return false;
6966}
6967
6969 unsigned &IdxToPropagate) const {
6970 bool PropagateNaN;
6971 switch (MI.getOpcode()) {
6972 default:
6973 return false;
6974 case TargetOpcode::G_FMINNUM:
6975 case TargetOpcode::G_FMAXNUM:
6976 PropagateNaN = false;
6977 break;
6978 case TargetOpcode::G_FMINIMUM:
6979 case TargetOpcode::G_FMAXIMUM:
6980 PropagateNaN = true;
6981 break;
6982 }
6983
6984 auto MatchNaN = [&](unsigned Idx) {
6985 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6986 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6987 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6988 return false;
6989 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6990 return true;
6991 };
6992
6993 return MatchNaN(1) || MatchNaN(2);
6994}
6995
6996// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6997// reciprocal.
6998// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
7000 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
7001 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
7002
7003 Register X = MI.getOperand(1).getReg();
7004 Register Y = MI.getOperand(2).getReg();
7005
7006 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
7007 return false;
7008
7009 auto IsOne = [this](Register X) {
7010 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
7011 return N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0));
7012 };
7013
7014 // Skip if current node is a reciprocal/fneg-reciprocal.
7015 if (IsOne(X))
7016 return false;
7017
7018 // Exit early if the target does not want this transform or if there can't
7019 // possibly be enough uses of the divisor to make the transform worthwhile.
7020 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
7021 if (!MinUses)
7022 return false;
7023
7024 // Find all FDIV users of the same divisor. For the moment we limit all
7025 // instructions to a single BB and use the first Instr in MatchInfo as the
7026 // dominating position.
7027 MatchInfo.push_back(&MI);
7028 for (auto &U : MRI.use_nodbg_instructions(Y)) {
7029 if (&U == &MI || U.getParent() != MI.getParent())
7030 continue;
7031 if (U.getOpcode() == TargetOpcode::G_FDIV &&
7032 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y &&
7033 !IsOne(U.getOperand(1).getReg())) {
7034 // This division is eligible for optimization only if global unsafe math
7035 // is enabled or if this division allows reciprocal formation.
7036 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
7037 MatchInfo.push_back(&U);
7038 if (dominates(U, *MatchInfo[0]))
7039 std::swap(MatchInfo[0], MatchInfo.back());
7040 }
7041 }
7042 }
7043
7044 // Now that we have the actual number of divisor uses, make sure it meets
7045 // the minimum threshold specified by the target.
7046 return MatchInfo.size() >= MinUses;
7047}
7048
7050 SmallVector<MachineInstr *> &MatchInfo) const {
7051 // Generate the new div at the position of the first instruction, that we have
7052 // ensured will dominate all other instructions.
7053 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
7054 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
7055 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
7056 MatchInfo[0]->getOperand(2).getReg(),
7057 MatchInfo[0]->getFlags());
7058
7059 // Replace all found div's with fmul instructions.
7060 for (MachineInstr *MI : MatchInfo) {
7061 Builder.setInsertPt(*MI->getParent(), MI);
7062 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
7063 Div->getOperand(0).getReg(), MI->getFlags());
7064 MI->eraseFromParent();
7065 }
7066}
7067
7069 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
7070 Register LHS = MI.getOperand(1).getReg();
7071 Register RHS = MI.getOperand(2).getReg();
7072
7073 // Helper lambda to check for opportunities for
7074 // A + (B - A) -> B
7075 // (B - A) + A -> B
7076 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
7077 Register Reg;
7078 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
7079 Reg == MaybeSameReg;
7080 };
7081 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
7082}
7083
7085 Register &MatchInfo) const {
7086 // This combine folds the following patterns:
7087 //
7088 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
7089 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
7090 // into
7091 // x
7092 // if
7093 // k == sizeof(VecEltTy)/2
7094 // type(x) == type(dst)
7095 //
7096 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
7097 // into
7098 // x
7099 // if
7100 // type(x) == type(dst)
7101
7102 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
7103 LLT DstEltTy = DstVecTy.getElementType();
7104
7105 Register Lo, Hi;
7106
7107 if (mi_match(
7108 MI, MRI,
7110 MatchInfo = Lo;
7111 return MRI.getType(MatchInfo) == DstVecTy;
7112 }
7113
7114 std::optional<ValueAndVReg> ShiftAmount;
7115 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7116 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7117 if (mi_match(
7118 MI, MRI,
7119 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7120 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7121 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7122 MatchInfo = Lo;
7123 return MRI.getType(MatchInfo) == DstVecTy;
7124 }
7125 }
7126
7127 return false;
7128}
7129
7131 Register &MatchInfo) const {
7132 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7133 // if type(x) == type(G_TRUNC)
7134 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7135 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7136 return false;
7137
7138 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7139}
7140
7142 Register &MatchInfo) const {
7143 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7144 // y if K == size of vector element type
7145 std::optional<ValueAndVReg> ShiftAmt;
7146 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7148 m_GCst(ShiftAmt))))
7149 return false;
7150
7151 LLT MatchTy = MRI.getType(MatchInfo);
7152 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7153 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7154}
7155
7156unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7157 CmpInst::Predicate Pred, LLT DstTy,
7158 SelectPatternNaNBehaviour VsNaNRetVal) const {
7159 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7160 "Expected a NaN behaviour?");
7161 // Choose an opcode based off of legality or the behaviour when one of the
7162 // LHS/RHS may be NaN.
7163 switch (Pred) {
7164 default:
7165 return 0;
7166 case CmpInst::FCMP_UGT:
7167 case CmpInst::FCMP_UGE:
7168 case CmpInst::FCMP_OGT:
7169 case CmpInst::FCMP_OGE:
7170 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7171 return TargetOpcode::G_FMAXNUM;
7172 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7173 return TargetOpcode::G_FMAXIMUM;
7174 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7175 return TargetOpcode::G_FMAXNUM;
7176 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7177 return TargetOpcode::G_FMAXIMUM;
7178 return 0;
7179 case CmpInst::FCMP_ULT:
7180 case CmpInst::FCMP_ULE:
7181 case CmpInst::FCMP_OLT:
7182 case CmpInst::FCMP_OLE:
7183 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7184 return TargetOpcode::G_FMINNUM;
7185 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7186 return TargetOpcode::G_FMINIMUM;
7187 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7188 return TargetOpcode::G_FMINNUM;
7189 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7190 return 0;
7191 return TargetOpcode::G_FMINIMUM;
7192 }
7193}
7194
7195CombinerHelper::SelectPatternNaNBehaviour
7196CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7197 bool IsOrderedComparison) const {
7198 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7199 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7200 // Completely unsafe.
7201 if (!LHSSafe && !RHSSafe)
7202 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7203 if (LHSSafe && RHSSafe)
7204 return SelectPatternNaNBehaviour::RETURNS_ANY;
7205 // An ordered comparison will return false when given a NaN, so it
7206 // returns the RHS.
7207 if (IsOrderedComparison)
7208 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7209 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7210 // An unordered comparison will return true when given a NaN, so it
7211 // returns the LHS.
7212 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7213 : SelectPatternNaNBehaviour::RETURNS_NAN;
7214}
7215
7216bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7217 Register TrueVal, Register FalseVal,
7218 BuildFnTy &MatchInfo) const {
7219 // Match: select (fcmp cond x, y) x, y
7220 // select (fcmp cond x, y) y, x
7221 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7222 LLT DstTy = MRI.getType(Dst);
7223 // Bail out early on pointers, since we'll never want to fold to a min/max.
7224 if (DstTy.isPointer())
7225 return false;
7226 // Match a floating point compare with a less-than/greater-than predicate.
7227 // TODO: Allow multiple users of the compare if they are all selects.
7228 CmpInst::Predicate Pred;
7229 Register CmpLHS, CmpRHS;
7230 if (!mi_match(Cond, MRI,
7232 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7233 CmpInst::isEquality(Pred))
7234 return false;
7235 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7236 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7237 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7238 return false;
7239 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7240 std::swap(CmpLHS, CmpRHS);
7241 Pred = CmpInst::getSwappedPredicate(Pred);
7242 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7243 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7244 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7245 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7246 }
7247 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7248 return false;
7249 // Decide what type of max/min this should be based off of the predicate.
7250 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7251 if (!Opc || !isLegal({Opc, {DstTy}}))
7252 return false;
7253 // Comparisons between signed zero and zero may have different results...
7254 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7255 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7256 // We don't know if a comparison between two 0s will give us a consistent
7257 // result. Be conservative and only proceed if at least one side is
7258 // non-zero.
7259 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7260 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7261 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7262 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7263 return false;
7264 }
7265 }
7266 MatchInfo = [=](MachineIRBuilder &B) {
7267 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7268 };
7269 return true;
7270}
7271
7273 BuildFnTy &MatchInfo) const {
7274 // TODO: Handle integer cases.
7275 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7276 // Condition may be fed by a truncated compare.
7277 Register Cond = MI.getOperand(1).getReg();
7278 Register MaybeTrunc;
7279 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7280 Cond = MaybeTrunc;
7281 Register Dst = MI.getOperand(0).getReg();
7282 Register TrueVal = MI.getOperand(2).getReg();
7283 Register FalseVal = MI.getOperand(3).getReg();
7284 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7285}
7286
7288 BuildFnTy &MatchInfo) const {
7289 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7290 // (X + Y) == X --> Y == 0
7291 // (X + Y) != X --> Y != 0
7292 // (X - Y) == X --> Y == 0
7293 // (X - Y) != X --> Y != 0
7294 // (X ^ Y) == X --> Y == 0
7295 // (X ^ Y) != X --> Y != 0
7296 Register Dst = MI.getOperand(0).getReg();
7297 CmpInst::Predicate Pred;
7298 Register X, Y, OpLHS, OpRHS;
7299 bool MatchedSub = mi_match(
7300 Dst, MRI,
7301 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7302 if (MatchedSub && X != OpLHS)
7303 return false;
7304 if (!MatchedSub) {
7305 if (!mi_match(Dst, MRI,
7306 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7307 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7308 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7309 return false;
7310 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7311 }
7312 MatchInfo = [=](MachineIRBuilder &B) {
7313 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7314 B.buildICmp(Pred, Dst, Y, Zero);
7315 };
7316 return CmpInst::isEquality(Pred) && Y.isValid();
7317}
7318
7319/// Return the minimum useless shift amount that results in complete loss of the
7320/// source value. Return std::nullopt when it cannot determine a value.
7321static std::optional<unsigned>
7322getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7323 std::optional<int64_t> &Result) {
7324 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7325 Opcode == TargetOpcode::G_ASHR) &&
7326 "Expect G_SHL, G_LSHR or G_ASHR.");
7327 auto SignificantBits = 0;
7328 switch (Opcode) {
7329 case TargetOpcode::G_SHL:
7330 SignificantBits = ValueKB.countMinTrailingZeros();
7331 Result = 0;
7332 break;
7333 case TargetOpcode::G_LSHR:
7334 Result = 0;
7335 SignificantBits = ValueKB.countMinLeadingZeros();
7336 break;
7337 case TargetOpcode::G_ASHR:
7338 if (ValueKB.isNonNegative()) {
7339 SignificantBits = ValueKB.countMinLeadingZeros();
7340 Result = 0;
7341 } else if (ValueKB.isNegative()) {
7342 SignificantBits = ValueKB.countMinLeadingOnes();
7343 Result = -1;
7344 } else {
7345 // Cannot determine shift result.
7346 Result = std::nullopt;
7347 }
7348 break;
7349 default:
7350 break;
7351 }
7352 return ValueKB.getBitWidth() - SignificantBits;
7353}
7354
7356 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7357 Register ShiftVal = MI.getOperand(1).getReg();
7358 Register ShiftReg = MI.getOperand(2).getReg();
7359 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7360 auto IsShiftTooBig = [&](const Constant *C) {
7361 auto *CI = dyn_cast<ConstantInt>(C);
7362 if (!CI)
7363 return false;
7364 if (CI->uge(ResTy.getScalarSizeInBits())) {
7365 MatchInfo = std::nullopt;
7366 return true;
7367 }
7368 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7369 MI.getOpcode(), MatchInfo);
7370 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7371 };
7372 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7373}
7374
7376 unsigned LHSOpndIdx = 1;
7377 unsigned RHSOpndIdx = 2;
7378 switch (MI.getOpcode()) {
7379 case TargetOpcode::G_UADDO:
7380 case TargetOpcode::G_SADDO:
7381 case TargetOpcode::G_UMULO:
7382 case TargetOpcode::G_SMULO:
7383 LHSOpndIdx = 2;
7384 RHSOpndIdx = 3;
7385 break;
7386 default:
7387 break;
7388 }
7389 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7390 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7391 if (!getIConstantVRegVal(LHS, MRI)) {
7392 // Skip commuting if LHS is not a constant. But, LHS may be a
7393 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7394 // have a constant on the RHS.
7395 if (MRI.getVRegDef(LHS)->getOpcode() !=
7396 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7397 return false;
7398 }
7399 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7400 return MRI.getVRegDef(RHS)->getOpcode() !=
7401 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7402 !getIConstantVRegVal(RHS, MRI);
7403}
7404
7406 Register LHS = MI.getOperand(1).getReg();
7407 Register RHS = MI.getOperand(2).getReg();
7408 std::optional<FPValueAndVReg> ValAndVReg;
7409 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7410 return false;
7411 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7412}
7413
7415 Observer.changingInstr(MI);
7416 unsigned LHSOpndIdx = 1;
7417 unsigned RHSOpndIdx = 2;
7418 switch (MI.getOpcode()) {
7419 case TargetOpcode::G_UADDO:
7420 case TargetOpcode::G_SADDO:
7421 case TargetOpcode::G_UMULO:
7422 case TargetOpcode::G_SMULO:
7423 LHSOpndIdx = 2;
7424 RHSOpndIdx = 3;
7425 break;
7426 default:
7427 break;
7428 }
7429 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7430 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7431 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7432 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7433 Observer.changedInstr(MI);
7434}
7435
7436bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7437 LLT SrcTy = MRI.getType(Src);
7438 if (SrcTy.isFixedVector())
7439 return isConstantSplatVector(Src, 1, AllowUndefs);
7440 if (SrcTy.isScalar()) {
7441 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7442 return true;
7443 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7444 return IConstant && IConstant->Value == 1;
7445 }
7446 return false; // scalable vector
7447}
7448
7449bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7450 LLT SrcTy = MRI.getType(Src);
7451 if (SrcTy.isFixedVector())
7452 return isConstantSplatVector(Src, 0, AllowUndefs);
7453 if (SrcTy.isScalar()) {
7454 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7455 return true;
7456 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7457 return IConstant && IConstant->Value == 0;
7458 }
7459 return false; // scalable vector
7460}
7461
7462// Ignores COPYs during conformance checks.
7463// FIXME scalable vectors.
7464bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7465 bool AllowUndefs) const {
7466 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7467 if (!BuildVector)
7468 return false;
7469 unsigned NumSources = BuildVector->getNumSources();
7470
7471 for (unsigned I = 0; I < NumSources; ++I) {
7472 GImplicitDef *ImplicitDef =
7474 if (ImplicitDef && AllowUndefs)
7475 continue;
7476 if (ImplicitDef && !AllowUndefs)
7477 return false;
7478 std::optional<ValueAndVReg> IConstant =
7480 if (IConstant && IConstant->Value == SplatValue)
7481 continue;
7482 return false;
7483 }
7484 return true;
7485}
7486
7487// Ignores COPYs during lookups.
7488// FIXME scalable vectors
7489std::optional<APInt>
7490CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7491 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7492 if (IConstant)
7493 return IConstant->Value;
7494
7495 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7496 if (!BuildVector)
7497 return std::nullopt;
7498 unsigned NumSources = BuildVector->getNumSources();
7499
7500 std::optional<APInt> Value = std::nullopt;
7501 for (unsigned I = 0; I < NumSources; ++I) {
7502 std::optional<ValueAndVReg> IConstant =
7504 if (!IConstant)
7505 return std::nullopt;
7506 if (!Value)
7507 Value = IConstant->Value;
7508 else if (*Value != IConstant->Value)
7509 return std::nullopt;
7510 }
7511 return Value;
7512}
7513
7514// FIXME G_SPLAT_VECTOR
7515bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7516 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7517 if (IConstant)
7518 return true;
7519
7520 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7521 if (!BuildVector)
7522 return false;
7523
7524 unsigned NumSources = BuildVector->getNumSources();
7525 for (unsigned I = 0; I < NumSources; ++I) {
7526 std::optional<ValueAndVReg> IConstant =
7528 if (!IConstant)
7529 return false;
7530 }
7531 return true;
7532}
7533
7534// TODO: use knownbits to determine zeros
7535bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7536 BuildFnTy &MatchInfo) const {
7537 uint32_t Flags = Select->getFlags();
7538 Register Dest = Select->getReg(0);
7539 Register Cond = Select->getCondReg();
7540 Register True = Select->getTrueReg();
7541 Register False = Select->getFalseReg();
7542 LLT CondTy = MRI.getType(Select->getCondReg());
7543 LLT TrueTy = MRI.getType(Select->getTrueReg());
7544
7545 // We only do this combine for scalar boolean conditions.
7546 if (CondTy != LLT::scalar(1))
7547 return false;
7548
7549 if (TrueTy.isPointer())
7550 return false;
7551
7552 // Both are scalars.
7553 std::optional<ValueAndVReg> TrueOpt =
7555 std::optional<ValueAndVReg> FalseOpt =
7557
7558 if (!TrueOpt || !FalseOpt)
7559 return false;
7560
7561 APInt TrueValue = TrueOpt->Value;
7562 APInt FalseValue = FalseOpt->Value;
7563
7564 // select Cond, 1, 0 --> zext (Cond)
7565 if (TrueValue.isOne() && FalseValue.isZero()) {
7566 MatchInfo = [=](MachineIRBuilder &B) {
7567 B.setInstrAndDebugLoc(*Select);
7568 B.buildZExtOrTrunc(Dest, Cond);
7569 };
7570 return true;
7571 }
7572
7573 // select Cond, -1, 0 --> sext (Cond)
7574 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7575 MatchInfo = [=](MachineIRBuilder &B) {
7576 B.setInstrAndDebugLoc(*Select);
7577 B.buildSExtOrTrunc(Dest, Cond);
7578 };
7579 return true;
7580 }
7581
7582 // select Cond, 0, 1 --> zext (!Cond)
7583 if (TrueValue.isZero() && FalseValue.isOne()) {
7584 MatchInfo = [=](MachineIRBuilder &B) {
7585 B.setInstrAndDebugLoc(*Select);
7586 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7587 B.buildNot(Inner, Cond);
7588 B.buildZExtOrTrunc(Dest, Inner);
7589 };
7590 return true;
7591 }
7592
7593 // select Cond, 0, -1 --> sext (!Cond)
7594 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7595 MatchInfo = [=](MachineIRBuilder &B) {
7596 B.setInstrAndDebugLoc(*Select);
7597 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7598 B.buildNot(Inner, Cond);
7599 B.buildSExtOrTrunc(Dest, Inner);
7600 };
7601 return true;
7602 }
7603
7604 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7605 if (TrueValue - 1 == FalseValue) {
7606 MatchInfo = [=](MachineIRBuilder &B) {
7607 B.setInstrAndDebugLoc(*Select);
7608 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7609 B.buildZExtOrTrunc(Inner, Cond);
7610 B.buildAdd(Dest, Inner, False);
7611 };
7612 return true;
7613 }
7614
7615 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7616 if (TrueValue + 1 == FalseValue) {
7617 MatchInfo = [=](MachineIRBuilder &B) {
7618 B.setInstrAndDebugLoc(*Select);
7619 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7620 B.buildSExtOrTrunc(Inner, Cond);
7621 B.buildAdd(Dest, Inner, False);
7622 };
7623 return true;
7624 }
7625
7626 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7627 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7628 MatchInfo = [=](MachineIRBuilder &B) {
7629 B.setInstrAndDebugLoc(*Select);
7630 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7631 B.buildZExtOrTrunc(Inner, Cond);
7632 // The shift amount must be scalar.
7633 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7634 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7635 B.buildShl(Dest, Inner, ShAmtC, Flags);
7636 };
7637 return true;
7638 }
7639
7640 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7641 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7642 MatchInfo = [=](MachineIRBuilder &B) {
7643 B.setInstrAndDebugLoc(*Select);
7644 Register Not = MRI.createGenericVirtualRegister(CondTy);
7645 B.buildNot(Not, Cond);
7646 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7647 B.buildZExtOrTrunc(Inner, Not);
7648 // The shift amount must be scalar.
7649 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7650 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7651 B.buildShl(Dest, Inner, ShAmtC, Flags);
7652 };
7653 return true;
7654 }
7655
7656 // select Cond, -1, C --> or (sext Cond), C
7657 if (TrueValue.isAllOnes()) {
7658 MatchInfo = [=](MachineIRBuilder &B) {
7659 B.setInstrAndDebugLoc(*Select);
7660 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7661 B.buildSExtOrTrunc(Inner, Cond);
7662 B.buildOr(Dest, Inner, False, Flags);
7663 };
7664 return true;
7665 }
7666
7667 // select Cond, C, -1 --> or (sext (not Cond)), C
7668 if (FalseValue.isAllOnes()) {
7669 MatchInfo = [=](MachineIRBuilder &B) {
7670 B.setInstrAndDebugLoc(*Select);
7671 Register Not = MRI.createGenericVirtualRegister(CondTy);
7672 B.buildNot(Not, Cond);
7673 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7674 B.buildSExtOrTrunc(Inner, Not);
7675 B.buildOr(Dest, Inner, True, Flags);
7676 };
7677 return true;
7678 }
7679
7680 return false;
7681}
7682
7683// TODO: use knownbits to determine zeros
7684bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7685 BuildFnTy &MatchInfo) const {
7686 uint32_t Flags = Select->getFlags();
7687 Register DstReg = Select->getReg(0);
7688 Register Cond = Select->getCondReg();
7689 Register True = Select->getTrueReg();
7690 Register False = Select->getFalseReg();
7691 LLT CondTy = MRI.getType(Select->getCondReg());
7692 LLT TrueTy = MRI.getType(Select->getTrueReg());
7693
7694 // Boolean or fixed vector of booleans.
7695 if (CondTy.isScalableVector() ||
7696 (CondTy.isFixedVector() &&
7697 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7698 CondTy.getScalarSizeInBits() != 1)
7699 return false;
7700
7701 if (CondTy != TrueTy)
7702 return false;
7703
7704 // select Cond, Cond, F --> or Cond, F
7705 // select Cond, 1, F --> or Cond, F
7706 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7707 MatchInfo = [=](MachineIRBuilder &B) {
7708 B.setInstrAndDebugLoc(*Select);
7709 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7710 B.buildZExtOrTrunc(Ext, Cond);
7711 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7712 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7713 };
7714 return true;
7715 }
7716
7717 // select Cond, T, Cond --> and Cond, T
7718 // select Cond, T, 0 --> and Cond, T
7719 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7720 MatchInfo = [=](MachineIRBuilder &B) {
7721 B.setInstrAndDebugLoc(*Select);
7722 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7723 B.buildZExtOrTrunc(Ext, Cond);
7724 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7725 B.buildAnd(DstReg, Ext, FreezeTrue);
7726 };
7727 return true;
7728 }
7729
7730 // select Cond, T, 1 --> or (not Cond), T
7731 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7732 MatchInfo = [=](MachineIRBuilder &B) {
7733 B.setInstrAndDebugLoc(*Select);
7734 // First the not.
7735 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7736 B.buildNot(Inner, Cond);
7737 // Then an ext to match the destination register.
7738 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7739 B.buildZExtOrTrunc(Ext, Inner);
7740 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7741 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7742 };
7743 return true;
7744 }
7745
7746 // select Cond, 0, F --> and (not Cond), F
7747 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7748 MatchInfo = [=](MachineIRBuilder &B) {
7749 B.setInstrAndDebugLoc(*Select);
7750 // First the not.
7751 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7752 B.buildNot(Inner, Cond);
7753 // Then an ext to match the destination register.
7754 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7755 B.buildZExtOrTrunc(Ext, Inner);
7756 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7757 B.buildAnd(DstReg, Ext, FreezeFalse);
7758 };
7759 return true;
7760 }
7761
7762 return false;
7763}
7764
7766 BuildFnTy &MatchInfo) const {
7767 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7768 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7769
7770 Register DstReg = Select->getReg(0);
7771 Register True = Select->getTrueReg();
7772 Register False = Select->getFalseReg();
7773 LLT DstTy = MRI.getType(DstReg);
7774
7775 if (DstTy.isPointerOrPointerVector())
7776 return false;
7777
7778 // We want to fold the icmp and replace the select.
7779 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7780 return false;
7781
7782 CmpInst::Predicate Pred = Cmp->getCond();
7783 // We need a larger or smaller predicate for
7784 // canonicalization.
7785 if (CmpInst::isEquality(Pred))
7786 return false;
7787
7788 Register CmpLHS = Cmp->getLHSReg();
7789 Register CmpRHS = Cmp->getRHSReg();
7790
7791 // We can swap CmpLHS and CmpRHS for higher hitrate.
7792 if (True == CmpRHS && False == CmpLHS) {
7793 std::swap(CmpLHS, CmpRHS);
7794 Pred = CmpInst::getSwappedPredicate(Pred);
7795 }
7796
7797 // (icmp X, Y) ? X : Y -> integer minmax.
7798 // see matchSelectPattern in ValueTracking.
7799 // Legality between G_SELECT and integer minmax can differ.
7800 if (True != CmpLHS || False != CmpRHS)
7801 return false;
7802
7803 switch (Pred) {
7804 case ICmpInst::ICMP_UGT:
7805 case ICmpInst::ICMP_UGE: {
7806 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7807 return false;
7808 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7809 return true;
7810 }
7811 case ICmpInst::ICMP_SGT:
7812 case ICmpInst::ICMP_SGE: {
7813 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7814 return false;
7815 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7816 return true;
7817 }
7818 case ICmpInst::ICMP_ULT:
7819 case ICmpInst::ICMP_ULE: {
7820 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7821 return false;
7822 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7823 return true;
7824 }
7825 case ICmpInst::ICMP_SLT:
7826 case ICmpInst::ICMP_SLE: {
7827 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7828 return false;
7829 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7830 return true;
7831 }
7832 default:
7833 return false;
7834 }
7835}
7836
7837// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7839 BuildFnTy &MatchInfo) const {
7840 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7841 Register DestReg = MI.getOperand(0).getReg();
7842 LLT DestTy = MRI.getType(DestReg);
7843
7844 Register X;
7845 Register Sub0;
7846 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7847 if (mi_match(DestReg, MRI,
7848 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7849 m_GSMax(m_Reg(X), NegPattern),
7850 m_GUMin(m_Reg(X), NegPattern),
7851 m_GUMax(m_Reg(X), NegPattern)))))) {
7852 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7853 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7854 if (isLegal({NewOpc, {DestTy}})) {
7855 MatchInfo = [=](MachineIRBuilder &B) {
7856 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7857 };
7858 return true;
7859 }
7860 }
7861
7862 return false;
7863}
7864
7867
7868 if (tryFoldSelectOfConstants(Select, MatchInfo))
7869 return true;
7870
7871 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7872 return true;
7873
7874 return false;
7875}
7876
7877/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7878/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7879/// into a single comparison using range-based reasoning.
7880/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7881bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7882 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7883 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7884 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7885 Register DstReg = Logic->getReg(0);
7886 Register LHS = Logic->getLHSReg();
7887 Register RHS = Logic->getRHSReg();
7888 unsigned Flags = Logic->getFlags();
7889
7890 // We need an G_ICMP on the LHS register.
7891 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7892 if (!Cmp1)
7893 return false;
7894
7895 // We need an G_ICMP on the RHS register.
7896 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7897 if (!Cmp2)
7898 return false;
7899
7900 // We want to fold the icmps.
7901 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7902 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7903 return false;
7904
7905 APInt C1;
7906 APInt C2;
7907 std::optional<ValueAndVReg> MaybeC1 =
7909 if (!MaybeC1)
7910 return false;
7911 C1 = MaybeC1->Value;
7912
7913 std::optional<ValueAndVReg> MaybeC2 =
7915 if (!MaybeC2)
7916 return false;
7917 C2 = MaybeC2->Value;
7918
7919 Register R1 = Cmp1->getLHSReg();
7920 Register R2 = Cmp2->getLHSReg();
7921 CmpInst::Predicate Pred1 = Cmp1->getCond();
7922 CmpInst::Predicate Pred2 = Cmp2->getCond();
7923 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7924 LLT CmpOperandTy = MRI.getType(R1);
7925
7926 if (CmpOperandTy.isPointer())
7927 return false;
7928
7929 // We build ands, adds, and constants of type CmpOperandTy.
7930 // They must be legal to build.
7931 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7932 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7933 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7934 return false;
7935
7936 // Look through add of a constant offset on R1, R2, or both operands. This
7937 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7938 std::optional<APInt> Offset1;
7939 std::optional<APInt> Offset2;
7940 if (R1 != R2) {
7941 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7942 std::optional<ValueAndVReg> MaybeOffset1 =
7944 if (MaybeOffset1) {
7945 R1 = Add->getLHSReg();
7946 Offset1 = MaybeOffset1->Value;
7947 }
7948 }
7949 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7950 std::optional<ValueAndVReg> MaybeOffset2 =
7952 if (MaybeOffset2) {
7953 R2 = Add->getLHSReg();
7954 Offset2 = MaybeOffset2->Value;
7955 }
7956 }
7957 }
7958
7959 if (R1 != R2)
7960 return false;
7961
7962 // We calculate the icmp ranges including maybe offsets.
7963 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7964 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7965 if (Offset1)
7966 CR1 = CR1.subtract(*Offset1);
7967
7968 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7969 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7970 if (Offset2)
7971 CR2 = CR2.subtract(*Offset2);
7972
7973 bool CreateMask = false;
7974 APInt LowerDiff;
7975 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7976 if (!CR) {
7977 // We need non-wrapping ranges.
7978 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7979 return false;
7980
7981 // Check whether we have equal-size ranges that only differ by one bit.
7982 // In that case we can apply a mask to map one range onto the other.
7983 LowerDiff = CR1.getLower() ^ CR2.getLower();
7984 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7985 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7986 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7987 CR1Size != CR2.getUpper() - CR2.getLower())
7988 return false;
7989
7990 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7991 CreateMask = true;
7992 }
7993
7994 if (IsAnd)
7995 CR = CR->inverse();
7996
7997 CmpInst::Predicate NewPred;
7998 APInt NewC, Offset;
7999 CR->getEquivalentICmp(NewPred, NewC, Offset);
8000
8001 // We take the result type of one of the original icmps, CmpTy, for
8002 // the to be build icmp. The operand type, CmpOperandTy, is used for
8003 // the other instructions and constants to be build. The types of
8004 // the parameters and output are the same for add and and. CmpTy
8005 // and the type of DstReg might differ. That is why we zext or trunc
8006 // the icmp into the destination register.
8007
8008 MatchInfo = [=](MachineIRBuilder &B) {
8009 if (CreateMask && Offset != 0) {
8010 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8011 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8012 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8013 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
8014 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8015 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8016 B.buildZExtOrTrunc(DstReg, ICmp);
8017 } else if (CreateMask && Offset == 0) {
8018 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8019 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8020 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8021 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
8022 B.buildZExtOrTrunc(DstReg, ICmp);
8023 } else if (!CreateMask && Offset != 0) {
8024 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8025 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
8026 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8027 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8028 B.buildZExtOrTrunc(DstReg, ICmp);
8029 } else if (!CreateMask && Offset == 0) {
8030 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8031 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
8032 B.buildZExtOrTrunc(DstReg, ICmp);
8033 } else {
8034 llvm_unreachable("unexpected configuration of CreateMask and Offset");
8035 }
8036 };
8037 return true;
8038}
8039
8040bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
8041 BuildFnTy &MatchInfo) const {
8042 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
8043 Register DestReg = Logic->getReg(0);
8044 Register LHS = Logic->getLHSReg();
8045 Register RHS = Logic->getRHSReg();
8046 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
8047
8048 // We need a compare on the LHS register.
8049 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
8050 if (!Cmp1)
8051 return false;
8052
8053 // We need a compare on the RHS register.
8054 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
8055 if (!Cmp2)
8056 return false;
8057
8058 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
8059 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
8060
8061 // We build one fcmp, want to fold the fcmps, replace the logic op,
8062 // and the fcmps must have the same shape.
8064 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
8065 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
8066 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
8067 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
8068 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
8069 return false;
8070
8071 CmpInst::Predicate PredL = Cmp1->getCond();
8072 CmpInst::Predicate PredR = Cmp2->getCond();
8073 Register LHS0 = Cmp1->getLHSReg();
8074 Register LHS1 = Cmp1->getRHSReg();
8075 Register RHS0 = Cmp2->getLHSReg();
8076 Register RHS1 = Cmp2->getRHSReg();
8077
8078 if (LHS0 == RHS1 && LHS1 == RHS0) {
8079 // Swap RHS operands to match LHS.
8080 PredR = CmpInst::getSwappedPredicate(PredR);
8081 std::swap(RHS0, RHS1);
8082 }
8083
8084 if (LHS0 == RHS0 && LHS1 == RHS1) {
8085 // We determine the new predicate.
8086 unsigned CmpCodeL = getFCmpCode(PredL);
8087 unsigned CmpCodeR = getFCmpCode(PredR);
8088 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
8089 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
8090 MatchInfo = [=](MachineIRBuilder &B) {
8091 // The fcmp predicates fill the lower part of the enum.
8092 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
8093 if (Pred == FCmpInst::FCMP_FALSE &&
8095 auto False = B.buildConstant(CmpTy, 0);
8096 B.buildZExtOrTrunc(DestReg, False);
8097 } else if (Pred == FCmpInst::FCMP_TRUE &&
8099 auto True =
8100 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
8101 CmpTy.isVector() /*isVector*/,
8102 true /*isFP*/));
8103 B.buildZExtOrTrunc(DestReg, True);
8104 } else { // We take the predicate without predicate optimizations.
8105 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
8106 B.buildZExtOrTrunc(DestReg, Cmp);
8107 }
8108 };
8109 return true;
8110 }
8111
8112 return false;
8113}
8114
8116 GAnd *And = cast<GAnd>(&MI);
8117
8118 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8119 return true;
8120
8121 if (tryFoldLogicOfFCmps(And, MatchInfo))
8122 return true;
8123
8124 return false;
8125}
8126
8128 GOr *Or = cast<GOr>(&MI);
8129
8130 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8131 return true;
8132
8133 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8134 return true;
8135
8136 return false;
8137}
8138
8140 BuildFnTy &MatchInfo) const {
8142
8143 // Addo has no flags
8144 Register Dst = Add->getReg(0);
8145 Register Carry = Add->getReg(1);
8146 Register LHS = Add->getLHSReg();
8147 Register RHS = Add->getRHSReg();
8148 bool IsSigned = Add->isSigned();
8149 LLT DstTy = MRI.getType(Dst);
8150 LLT CarryTy = MRI.getType(Carry);
8151
8152 // Fold addo, if the carry is dead -> add, undef.
8153 if (MRI.use_nodbg_empty(Carry) &&
8154 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8155 MatchInfo = [=](MachineIRBuilder &B) {
8156 B.buildAdd(Dst, LHS, RHS);
8157 B.buildUndef(Carry);
8158 };
8159 return true;
8160 }
8161
8162 // Canonicalize constant to RHS.
8163 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8164 if (IsSigned) {
8165 MatchInfo = [=](MachineIRBuilder &B) {
8166 B.buildSAddo(Dst, Carry, RHS, LHS);
8167 };
8168 return true;
8169 }
8170 // !IsSigned
8171 MatchInfo = [=](MachineIRBuilder &B) {
8172 B.buildUAddo(Dst, Carry, RHS, LHS);
8173 };
8174 return true;
8175 }
8176
8177 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8178 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8179
8180 // Fold addo(c1, c2) -> c3, carry.
8181 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8183 bool Overflow;
8184 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8185 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8186 MatchInfo = [=](MachineIRBuilder &B) {
8187 B.buildConstant(Dst, Result);
8188 B.buildConstant(Carry, Overflow);
8189 };
8190 return true;
8191 }
8192
8193 // Fold (addo x, 0) -> x, no carry
8194 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8195 MatchInfo = [=](MachineIRBuilder &B) {
8196 B.buildCopy(Dst, LHS);
8197 B.buildConstant(Carry, 0);
8198 };
8199 return true;
8200 }
8201
8202 // Given 2 constant operands whose sum does not overflow:
8203 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8204 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8205 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8206 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8207 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8208 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8209 std::optional<APInt> MaybeAddRHS =
8210 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8211 if (MaybeAddRHS) {
8212 bool Overflow;
8213 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8214 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8215 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8216 if (IsSigned) {
8217 MatchInfo = [=](MachineIRBuilder &B) {
8218 auto ConstRHS = B.buildConstant(DstTy, NewC);
8219 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8220 };
8221 return true;
8222 }
8223 // !IsSigned
8224 MatchInfo = [=](MachineIRBuilder &B) {
8225 auto ConstRHS = B.buildConstant(DstTy, NewC);
8226 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8227 };
8228 return true;
8229 }
8230 }
8231 };
8232
8233 // We try to combine addo to non-overflowing add.
8234 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8236 return false;
8237
8238 // We try to combine uaddo to non-overflowing add.
8239 if (!IsSigned) {
8240 ConstantRange CRLHS =
8241 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8242 ConstantRange CRRHS =
8243 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8244
8245 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8247 return false;
8249 MatchInfo = [=](MachineIRBuilder &B) {
8250 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8251 B.buildConstant(Carry, 0);
8252 };
8253 return true;
8254 }
8257 MatchInfo = [=](MachineIRBuilder &B) {
8258 B.buildAdd(Dst, LHS, RHS);
8259 B.buildConstant(Carry, 1);
8260 };
8261 return true;
8262 }
8263 }
8264 return false;
8265 }
8266
8267 // We try to combine saddo to non-overflowing add.
8268
8269 // If LHS and RHS each have at least two sign bits, then there is no signed
8270 // overflow.
8271 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8272 MatchInfo = [=](MachineIRBuilder &B) {
8273 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8274 B.buildConstant(Carry, 0);
8275 };
8276 return true;
8277 }
8278
8279 ConstantRange CRLHS =
8280 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8281 ConstantRange CRRHS =
8282 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8283
8284 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8286 return false;
8288 MatchInfo = [=](MachineIRBuilder &B) {
8289 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8290 B.buildConstant(Carry, 0);
8291 };
8292 return true;
8293 }
8296 MatchInfo = [=](MachineIRBuilder &B) {
8297 B.buildAdd(Dst, LHS, RHS);
8298 B.buildConstant(Carry, 1);
8299 };
8300 return true;
8301 }
8302 }
8303
8304 return false;
8305}
8306
8308 BuildFnTy &MatchInfo) const {
8310 MatchInfo(Builder);
8311 Root->eraseFromParent();
8312}
8313
8315 int64_t Exponent) const {
8316 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8318}
8319
8321 int64_t Exponent) const {
8322 auto [Dst, Base] = MI.getFirst2Regs();
8323 LLT Ty = MRI.getType(Dst);
8324 int64_t ExpVal = Exponent;
8325
8326 if (ExpVal == 0) {
8327 Builder.buildFConstant(Dst, 1.0);
8328 MI.removeFromParent();
8329 return;
8330 }
8331
8332 if (ExpVal < 0)
8333 ExpVal = -ExpVal;
8334
8335 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8336 // to generate the multiply sequence. There are more optimal ways to do this
8337 // (for example, powi(x,15) generates one more multiply than it should), but
8338 // this has the benefit of being both really simple and much better than a
8339 // libcall.
8340 std::optional<SrcOp> Res;
8341 SrcOp CurSquare = Base;
8342 while (ExpVal > 0) {
8343 if (ExpVal & 1) {
8344 if (!Res)
8345 Res = CurSquare;
8346 else
8347 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8348 }
8349
8350 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8351 ExpVal >>= 1;
8352 }
8353
8354 // If the original exponent was negative, invert the result, producing
8355 // 1/(x*x*x).
8356 if (Exponent < 0)
8357 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8358 MI.getFlags());
8359
8360 Builder.buildCopy(Dst, *Res);
8361 MI.eraseFromParent();
8362}
8363
8365 BuildFnTy &MatchInfo) const {
8366 // fold (A+C1)-C2 -> A+(C1-C2)
8367 const GSub *Sub = cast<GSub>(&MI);
8368 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8369
8370 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8371 return false;
8372
8373 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8374 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8375
8376 Register Dst = Sub->getReg(0);
8377 LLT DstTy = MRI.getType(Dst);
8378
8379 MatchInfo = [=](MachineIRBuilder &B) {
8380 auto Const = B.buildConstant(DstTy, C1 - C2);
8381 B.buildAdd(Dst, Add->getLHSReg(), Const);
8382 };
8383
8384 return true;
8385}
8386
8388 BuildFnTy &MatchInfo) const {
8389 // fold C2-(A+C1) -> (C2-C1)-A
8390 const GSub *Sub = cast<GSub>(&MI);
8391 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8392
8393 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8394 return false;
8395
8396 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8397 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8398
8399 Register Dst = Sub->getReg(0);
8400 LLT DstTy = MRI.getType(Dst);
8401
8402 MatchInfo = [=](MachineIRBuilder &B) {
8403 auto Const = B.buildConstant(DstTy, C2 - C1);
8404 B.buildSub(Dst, Const, Add->getLHSReg());
8405 };
8406
8407 return true;
8408}
8409
8411 BuildFnTy &MatchInfo) const {
8412 // fold (A-C1)-C2 -> A-(C1+C2)
8413 const GSub *Sub1 = cast<GSub>(&MI);
8414 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8415
8416 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8417 return false;
8418
8419 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8420 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8421
8422 Register Dst = Sub1->getReg(0);
8423 LLT DstTy = MRI.getType(Dst);
8424
8425 MatchInfo = [=](MachineIRBuilder &B) {
8426 auto Const = B.buildConstant(DstTy, C1 + C2);
8427 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8428 };
8429
8430 return true;
8431}
8432
8434 BuildFnTy &MatchInfo) const {
8435 // fold (C1-A)-C2 -> (C1-C2)-A
8436 const GSub *Sub1 = cast<GSub>(&MI);
8437 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8438
8439 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8440 return false;
8441
8442 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8443 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8444
8445 Register Dst = Sub1->getReg(0);
8446 LLT DstTy = MRI.getType(Dst);
8447
8448 MatchInfo = [=](MachineIRBuilder &B) {
8449 auto Const = B.buildConstant(DstTy, C1 - C2);
8450 B.buildSub(Dst, Const, Sub2->getRHSReg());
8451 };
8452
8453 return true;
8454}
8455
8457 BuildFnTy &MatchInfo) const {
8458 // fold ((A-C1)+C2) -> (A+(C2-C1))
8459 const GAdd *Add = cast<GAdd>(&MI);
8460 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8461
8462 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8463 return false;
8464
8465 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8466 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8467
8468 Register Dst = Add->getReg(0);
8469 LLT DstTy = MRI.getType(Dst);
8470
8471 MatchInfo = [=](MachineIRBuilder &B) {
8472 auto Const = B.buildConstant(DstTy, C2 - C1);
8473 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8474 };
8475
8476 return true;
8477}
8478
8480 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8481 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8482
8483 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8484 return false;
8485
8486 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8487
8488 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8489
8490 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8491 // $any:_(<8 x s16>) = G_ANYEXT $bv
8492 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8493 //
8494 // ->
8495 //
8496 // $any:_(s16) = G_ANYEXT $bv[0]
8497 // $any1:_(s16) = G_ANYEXT $bv[1]
8498 // $any2:_(s16) = G_ANYEXT $bv[2]
8499 // $any3:_(s16) = G_ANYEXT $bv[3]
8500 // $any4:_(s16) = G_ANYEXT $bv[4]
8501 // $any5:_(s16) = G_ANYEXT $bv[5]
8502 // $any6:_(s16) = G_ANYEXT $bv[6]
8503 // $any7:_(s16) = G_ANYEXT $bv[7]
8504 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8505 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8506
8507 // We want to unmerge into vectors.
8508 if (!DstTy.isFixedVector())
8509 return false;
8510
8511 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8512 if (!Any)
8513 return false;
8514
8515 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8516
8517 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8518 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8519
8520 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8521 return false;
8522
8523 // FIXME: check element types?
8524 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8525 return false;
8526
8527 LLT BigBvTy = MRI.getType(BV->getReg(0));
8528 LLT SmallBvTy = DstTy;
8529 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8530
8532 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8533 return false;
8534
8535 // We check the legality of scalar anyext.
8537 {TargetOpcode::G_ANYEXT,
8538 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8539 return false;
8540
8541 MatchInfo = [=](MachineIRBuilder &B) {
8542 // Build into each G_UNMERGE_VALUES def
8543 // a small build vector with anyext from the source build vector.
8544 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8546 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8547 Register SourceArray =
8548 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8549 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8550 Ops.push_back(AnyExt.getReg(0));
8551 }
8552 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8553 };
8554 };
8555 return true;
8556 };
8557
8558 return false;
8559}
8560
8562 BuildFnTy &MatchInfo) const {
8563
8564 bool Changed = false;
8565 auto &Shuffle = cast<GShuffleVector>(MI);
8566 ArrayRef<int> OrigMask = Shuffle.getMask();
8567 SmallVector<int, 16> NewMask;
8568 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8569 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8570 const unsigned NumDstElts = OrigMask.size();
8571 for (unsigned i = 0; i != NumDstElts; ++i) {
8572 int Idx = OrigMask[i];
8573 if (Idx >= (int)NumSrcElems) {
8574 Idx = -1;
8575 Changed = true;
8576 }
8577 NewMask.push_back(Idx);
8578 }
8579
8580 if (!Changed)
8581 return false;
8582
8583 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8584 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8585 std::move(NewMask));
8586 };
8587
8588 return true;
8589}
8590
8591static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8592 const unsigned MaskSize = Mask.size();
8593 for (unsigned I = 0; I < MaskSize; ++I) {
8594 int Idx = Mask[I];
8595 if (Idx < 0)
8596 continue;
8597
8598 if (Idx < (int)NumElems)
8599 Mask[I] = Idx + NumElems;
8600 else
8601 Mask[I] = Idx - NumElems;
8602 }
8603}
8604
8606 BuildFnTy &MatchInfo) const {
8607
8608 auto &Shuffle = cast<GShuffleVector>(MI);
8609 // If any of the two inputs is already undef, don't check the mask again to
8610 // prevent infinite loop
8611 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8612 return false;
8613
8614 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8615 return false;
8616
8617 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8618 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8620 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8621 return false;
8622
8623 ArrayRef<int> Mask = Shuffle.getMask();
8624 const unsigned NumSrcElems = Src1Ty.getNumElements();
8625
8626 bool TouchesSrc1 = false;
8627 bool TouchesSrc2 = false;
8628 const unsigned NumElems = Mask.size();
8629 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8630 if (Mask[Idx] < 0)
8631 continue;
8632
8633 if (Mask[Idx] < (int)NumSrcElems)
8634 TouchesSrc1 = true;
8635 else
8636 TouchesSrc2 = true;
8637 }
8638
8639 if (TouchesSrc1 == TouchesSrc2)
8640 return false;
8641
8642 Register NewSrc1 = Shuffle.getSrc1Reg();
8643 SmallVector<int, 16> NewMask(Mask);
8644 if (TouchesSrc2) {
8645 NewSrc1 = Shuffle.getSrc2Reg();
8646 commuteMask(NewMask, NumSrcElems);
8647 }
8648
8649 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8650 auto Undef = B.buildUndef(Src1Ty);
8651 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8652 };
8653
8654 return true;
8655}
8656
8658 BuildFnTy &MatchInfo) const {
8659 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8660
8661 Register Dst = Subo->getReg(0);
8662 Register LHS = Subo->getLHSReg();
8663 Register RHS = Subo->getRHSReg();
8664 Register Carry = Subo->getCarryOutReg();
8665 LLT DstTy = MRI.getType(Dst);
8666 LLT CarryTy = MRI.getType(Carry);
8667
8668 // Check legality before known bits.
8669 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8671 return false;
8672
8673 ConstantRange KBLHS =
8674 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8675 /* IsSigned=*/Subo->isSigned());
8676 ConstantRange KBRHS =
8677 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8678 /* IsSigned=*/Subo->isSigned());
8679
8680 if (Subo->isSigned()) {
8681 // G_SSUBO
8682 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8684 return false;
8686 MatchInfo = [=](MachineIRBuilder &B) {
8687 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8688 B.buildConstant(Carry, 0);
8689 };
8690 return true;
8691 }
8694 MatchInfo = [=](MachineIRBuilder &B) {
8695 B.buildSub(Dst, LHS, RHS);
8696 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8697 /*isVector=*/CarryTy.isVector(),
8698 /*isFP=*/false));
8699 };
8700 return true;
8701 }
8702 }
8703 return false;
8704 }
8705
8706 // G_USUBO
8707 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8709 return false;
8711 MatchInfo = [=](MachineIRBuilder &B) {
8712 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8713 B.buildConstant(Carry, 0);
8714 };
8715 return true;
8716 }
8719 MatchInfo = [=](MachineIRBuilder &B) {
8720 B.buildSub(Dst, LHS, RHS);
8721 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8722 /*isVector=*/CarryTy.isVector(),
8723 /*isFP=*/false));
8724 };
8725 return true;
8726 }
8727 }
8728
8729 return false;
8730}
8731
8732// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8733// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8735 BuildFnTy &MatchInfo) const {
8736 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8737 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON) &&
8738 "Expected G_CTLZ variant");
8739
8740 const Register Dst = CtlzMI.getOperand(0).getReg();
8741 Register Src = CtlzMI.getOperand(1).getReg();
8742
8743 LLT Ty = MRI.getType(Dst);
8744 LLT SrcTy = MRI.getType(Src);
8745
8746 if (!(Ty.isValid() && Ty.isScalar()))
8747 return false;
8748
8749 if (!LI)
8750 return false;
8751
8752 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8753 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8754
8755 switch (LI->getAction(Query).Action) {
8756 default:
8757 return false;
8761 break;
8762 }
8763
8764 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8765 Register V;
8766 bool NeedAdd = true;
8767 if (mi_match(Src, MRI,
8769 m_SpecificICst(1))))) {
8770 NeedAdd = false;
8771 Src = V;
8772 }
8773
8774 unsigned BitWidth = Ty.getScalarSizeInBits();
8775
8776 Register X;
8777 if (!mi_match(Src, MRI,
8780 m_SpecificICst(BitWidth - 1)))))))
8781 return false;
8782
8783 MatchInfo = [=](MachineIRBuilder &B) {
8784 if (!NeedAdd) {
8785 B.buildCTLS(Dst, X);
8786 return;
8787 }
8788
8789 auto Ctls = B.buildCTLS(Ty, X);
8790 auto One = B.buildConstant(Ty, 1);
8791
8792 B.buildAdd(Dst, Ctls, One);
8793 };
8794
8795 return true;
8796}
8797
8798// Fold shr ( add ( ext X, ext Y ), 1 ) -> avgfloor ( x, y )
8799// Fold shr ( add ( ext X, ext Y, 1 ), 1 ) -> avgceil ( x, y )
8802 unsigned TargetOpc) const {
8803 assert((MI.getOpcode() == TargetOpcode::G_LSHR ||
8804 MI.getOpcode() == TargetOpcode::G_ASHR) &&
8805 "Expected G_LSHR/G_ASHR");
8806
8807 LLT XTy = MRI.getType(X);
8808 return XTy == MRI.getType(Y) && isLegal({TargetOpc, {XTy}});
8809}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
bool isNaN() const
Definition APFloat.h:1536
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1294
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:755
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:754
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:742
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchAVG(MachineInstr &MI, MachineRegisterInfo &MRI, Register X, Register Y, unsigned TargetOpc) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
const TargetInstrInfo * TII
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchConstantFoldUnaryIntOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Constant fold a unary integer op (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON variants,...
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool matchOperandIsKnownToBeAPowerOfTwo(const MachineOperand &MO, bool OrNegative=false) const
Check if operand MO is known to be a power of 2.
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifySRemByPow2(MachineInstr &MI) const
Combine G_SREM x, (+/-2^k) to a bias-and-mask sequence.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to G_BITCAST(G_BUILD_VECTOR(.....
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1444
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1984
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1404
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:741
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1584
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1616
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:672
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1507
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:200
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1437
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:908
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1540
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1641
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI SmallVector< APInt > ConstantFoldUnaryIntOp(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI)
Tries to constant fold a unary integer operation (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON vari...
Definition Utils.cpp:945
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1422
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:229
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...