LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 TII(Builder.getMF().getSubtarget().getInstrInfo()),
66 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
67 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
68 (void)this->VT;
69}
70
72 return *Builder.getMF().getSubtarget().getTargetLowering();
73}
74
76 return Builder.getMF();
77}
78
82
83LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
84
85/// \returns The little endian in-memory byte position of byte \p I in a
86/// \p ByteWidth bytes wide type.
87///
88/// E.g. Given a 4-byte type x, x[0] -> byte 0
89static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
90 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
91 return I;
92}
93
94/// Determines the LogBase2 value for a non-null input value using the
95/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
97 auto &MRI = *MIB.getMRI();
98 LLT Ty = MRI.getType(V);
99 auto Ctlz = MIB.buildCTLZ(Ty, V);
100 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
101 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
102}
103
104/// \returns The big endian in-memory byte position of byte \p I in a
105/// \p ByteWidth bytes wide type.
106///
107/// E.g. Given a 4-byte type x, x[0] -> byte 3
108static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
109 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
110 return ByteWidth - I - 1;
111}
112
113/// Given a map from byte offsets in memory to indices in a load/store,
114/// determine if that map corresponds to a little or big endian byte pattern.
115///
116/// \param MemOffset2Idx maps memory offsets to address offsets.
117/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
118///
119/// \returns true if the map corresponds to a big endian byte pattern, false if
120/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
121///
122/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
123/// are as follows:
124///
125/// AddrOffset Little endian Big endian
126/// 0 0 3
127/// 1 1 2
128/// 2 2 1
129/// 3 3 0
130static std::optional<bool>
132 int64_t LowestIdx) {
133 // Need at least two byte positions to decide on endianness.
134 unsigned Width = MemOffset2Idx.size();
135 if (Width < 2)
136 return std::nullopt;
137 bool BigEndian = true, LittleEndian = true;
138 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
139 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
140 if (MemOffsetAndIdx == MemOffset2Idx.end())
141 return std::nullopt;
142 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
143 assert(Idx >= 0 && "Expected non-negative byte offset?");
144 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
145 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
146 if (!BigEndian && !LittleEndian)
147 return std::nullopt;
148 }
149
150 assert((BigEndian != LittleEndian) &&
151 "Pattern cannot be both big and little endian!");
152 return BigEndian;
153}
154
156
157bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
158 assert(LI && "Must have LegalizerInfo to query isLegal!");
159 return LI->getAction(Query).Action == LegalizeActions::Legal;
160}
161
163 const LegalityQuery &Query) const {
164 return isPreLegalize() || isLegal(Query);
165}
166
168 return isLegal(Query) ||
169 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
170}
171
173 const LegalityQuery &Query) const {
174 LegalizeAction Action = LI->getAction(Query).Action;
175 return Action == LegalizeActions::Legal ||
177}
178
180 if (!Ty.isVector())
181 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
182 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
183 if (isPreLegalize())
184 return true;
185 LLT EltTy = Ty.getElementType();
186 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
187 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
188}
189
191 Register ToReg) const {
192 Observer.changingAllUsesOfReg(MRI, FromReg);
193
194 if (MRI.constrainRegAttrs(ToReg, FromReg))
195 MRI.replaceRegWith(FromReg, ToReg);
196 else
197 Builder.buildCopy(FromReg, ToReg);
198
199 Observer.finishedChangingAllUsesOfReg();
200}
201
203 MachineOperand &FromRegOp,
204 Register ToReg) const {
205 assert(FromRegOp.getParent() && "Expected an operand in an MI");
206 Observer.changingInstr(*FromRegOp.getParent());
207
208 FromRegOp.setReg(ToReg);
209
210 Observer.changedInstr(*FromRegOp.getParent());
211}
212
214 unsigned ToOpcode) const {
215 Observer.changingInstr(FromMI);
216
217 FromMI.setDesc(Builder.getTII().get(ToOpcode));
218
219 Observer.changedInstr(FromMI);
220}
221
223 return RBI->getRegBank(Reg, MRI, *TRI);
224}
225
227 const RegisterBank *RegBank) const {
228 if (RegBank)
229 MRI.setRegBank(Reg, *RegBank);
230}
231
233 if (matchCombineCopy(MI)) {
235 return true;
236 }
237 return false;
238}
240 if (MI.getOpcode() != TargetOpcode::COPY)
241 return false;
242 Register DstReg = MI.getOperand(0).getReg();
243 Register SrcReg = MI.getOperand(1).getReg();
244 return canReplaceReg(DstReg, SrcReg, MRI);
245}
247 Register DstReg = MI.getOperand(0).getReg();
248 Register SrcReg = MI.getOperand(1).getReg();
249 replaceRegWith(MRI, DstReg, SrcReg);
250 MI.eraseFromParent();
251}
252
254 MachineInstr &MI, BuildFnTy &MatchInfo) const {
255 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
256 Register DstOp = MI.getOperand(0).getReg();
257 Register OrigOp = MI.getOperand(1).getReg();
258
259 if (!MRI.hasOneNonDBGUse(OrigOp))
260 return false;
261
262 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
263 // Even if only a single operand of the PHI is not guaranteed non-poison,
264 // moving freeze() backwards across a PHI can cause optimization issues for
265 // other users of that operand.
266 //
267 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
268 // the source register is unprofitable because it makes the freeze() more
269 // strict than is necessary (it would affect the whole register instead of
270 // just the subreg being frozen).
271 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
272 return false;
273
274 if (canCreateUndefOrPoison(OrigOp, MRI,
275 /*ConsiderFlagsAndMetadata=*/false))
276 return false;
277
278 std::optional<MachineOperand> MaybePoisonOperand;
279 for (MachineOperand &Operand : OrigDef->uses()) {
280 if (!Operand.isReg())
281 return false;
282
283 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
284 continue;
285
286 if (!MaybePoisonOperand)
287 MaybePoisonOperand = Operand;
288 else {
289 // We have more than one maybe-poison operand. Moving the freeze is
290 // unsafe.
291 return false;
292 }
293 }
294
295 // Eliminate freeze if all operands are guaranteed non-poison.
296 if (!MaybePoisonOperand) {
297 MatchInfo = [=](MachineIRBuilder &B) {
298 Observer.changingInstr(*OrigDef);
299 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
300 Observer.changedInstr(*OrigDef);
301 B.buildCopy(DstOp, OrigOp);
302 };
303 return true;
304 }
305
306 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
307 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
308
309 MatchInfo = [=](MachineIRBuilder &B) mutable {
310 Observer.changingInstr(*OrigDef);
311 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
312 Observer.changedInstr(*OrigDef);
313 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
314 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
316 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
317 Freeze.getReg(0));
318 replaceRegWith(MRI, DstOp, OrigOp);
319 };
320 return true;
321}
322
325 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
326 "Invalid instruction");
327 bool IsUndef = true;
328 MachineInstr *Undef = nullptr;
329
330 // Walk over all the operands of concat vectors and check if they are
331 // build_vector themselves or undef.
332 // Then collect their operands in Ops.
333 for (const MachineOperand &MO : MI.uses()) {
334 Register Reg = MO.getReg();
335 MachineInstr *Def = MRI.getVRegDef(Reg);
336 assert(Def && "Operand not defined");
337 if (!MRI.hasOneNonDBGUse(Reg))
338 return false;
339 switch (Def->getOpcode()) {
340 case TargetOpcode::G_BUILD_VECTOR:
341 IsUndef = false;
342 // Remember the operands of the build_vector to fold
343 // them into the yet-to-build flattened concat vectors.
344 for (const MachineOperand &BuildVecMO : Def->uses())
345 Ops.push_back(BuildVecMO.getReg());
346 break;
347 case TargetOpcode::G_IMPLICIT_DEF: {
348 LLT OpType = MRI.getType(Reg);
349 // Keep one undef value for all the undef operands.
350 if (!Undef) {
351 Builder.setInsertPt(*MI.getParent(), MI);
352 Undef = Builder.buildUndef(OpType.getScalarType());
353 }
354 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
355 OpType.getScalarType() &&
356 "All undefs should have the same type");
357 // Break the undef vector in as many scalar elements as needed
358 // for the flattening.
359 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
360 EltIdx != EltEnd; ++EltIdx)
361 Ops.push_back(Undef->getOperand(0).getReg());
362 break;
363 }
364 default:
365 return false;
366 }
367 }
368
369 // Check if the combine is illegal
370 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
372 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
373 return false;
374 }
375
376 if (IsUndef)
377 Ops.clear();
378
379 return true;
380}
383 // We determined that the concat_vectors can be flatten.
384 // Generate the flattened build_vector.
385 Register DstReg = MI.getOperand(0).getReg();
386 Builder.setInsertPt(*MI.getParent(), MI);
387 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
388
389 // Note: IsUndef is sort of redundant. We could have determine it by
390 // checking that at all Ops are undef. Alternatively, we could have
391 // generate a build_vector of undefs and rely on another combine to
392 // clean that up. For now, given we already gather this information
393 // in matchCombineConcatVectors, just save compile time and issue the
394 // right thing.
395 if (Ops.empty())
396 Builder.buildUndef(NewDstReg);
397 else
398 Builder.buildBuildVector(NewDstReg, Ops);
399 replaceRegWith(MRI, DstReg, NewDstReg);
400 MI.eraseFromParent();
401}
402
405 auto &BV = cast<GBuildVector>(MI);
406
407 // Look at the first operand for a unmerge(bitcast) from a scalar type.
408 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
409 if (!Unmerge || Unmerge->getReg(0) != BV.getSourceReg(0))
410 return false;
411 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
412 if (BC->getOpcode() != TargetOpcode::G_BITCAST)
413 return false;
414 LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
415 unsigned Factor = Unmerge->getNumDefs();
416 if (!InputTy.isScalar() || BV.getNumSources() % Factor != 0)
417 return false;
418
419 // Check if the build_vector is legal
420 LLT BVDstTy = LLT::fixed_vector(BV.getNumSources() / Factor, InputTy);
421 if (!isLegal({TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
422 return false;
423
424 // Check all other operands are bitcasts or undef.
425 for (unsigned Idx = 0; Idx < BV.getNumSources(); Idx += Factor) {
426 GUnmerge *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(Idx), MRI);
427 if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
428 MachineInstr *Src = MRI.getVRegDef(BV.getSourceReg(Idx + J));
429 if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
430 return true;
431 return Unmerge && BV.getSourceReg(Idx + J) == Unmerge->getReg(J);
432 }))
433 return false;
434 if (!Unmerge)
435 Ops.push_back(0);
436 else {
437 MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
438 if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
439 MRI.getType(BC->getOperand(1).getReg()) != InputTy)
440 return false;
441 Ops.push_back(BC->getOperand(1).getReg());
442 }
443 }
444
445 return true;
446}
447
450 LLT SrcTy = MRI.getType(Ops[0]);
451 // Build undef if any operations require it.
452 Register Undef = 0;
453 for (Register &Op : Ops) {
454 if (!Op) {
455 if (!Undef)
456 Undef = Builder.buildUndef(SrcTy).getReg(0);
457 Op = Undef;
458 }
459 }
460
461 LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
462 auto BV = Builder.buildBuildVector(BVDstTy, Ops);
463 Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
464 MI.eraseFromParent();
465}
466
468 auto &Shuffle = cast<GShuffleVector>(MI);
469
470 Register SrcVec1 = Shuffle.getSrc1Reg();
471 Register SrcVec2 = Shuffle.getSrc2Reg();
472 LLT EltTy = MRI.getType(SrcVec1).getElementType();
473 int Width = MRI.getType(SrcVec1).getNumElements();
474
475 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
476 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
477
478 SmallVector<Register> Extracts;
479 // Select only applicable elements from unmerged values.
480 for (int Val : Shuffle.getMask()) {
481 if (Val == -1)
482 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
483 else if (Val < Width)
484 Extracts.push_back(Unmerge1.getReg(Val));
485 else
486 Extracts.push_back(Unmerge2.getReg(Val - Width));
487 }
488 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
489 if (Extracts.size() == 1)
490 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
491 else
492 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
493 MI.eraseFromParent();
494}
495
498 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
499 auto ConcatMI1 =
500 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
501 auto ConcatMI2 =
502 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
503 if (!ConcatMI1 || !ConcatMI2)
504 return false;
505
506 // Check that the sources of the Concat instructions have the same type
507 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
508 MRI.getType(ConcatMI2->getSourceReg(0)))
509 return false;
510
511 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
512 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
513 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
514 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
515 // Check if the index takes a whole source register from G_CONCAT_VECTORS
516 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
517 if (Mask[i] == -1) {
518 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
519 if (i + j >= Mask.size())
520 return false;
521 if (Mask[i + j] != -1)
522 return false;
523 }
525 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
526 return false;
527 Ops.push_back(0);
528 } else if (Mask[i] % ConcatSrcNumElt == 0) {
529 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
530 if (i + j >= Mask.size())
531 return false;
532 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
533 return false;
534 }
535 // Retrieve the source register from its respective G_CONCAT_VECTORS
536 // instruction
537 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
538 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
539 } else {
540 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
541 ConcatMI1->getNumSources()));
542 }
543 } else {
544 return false;
545 }
546 }
547
549 {TargetOpcode::G_CONCAT_VECTORS,
550 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
551 return false;
552
553 return !Ops.empty();
554}
555
558 LLT SrcTy;
559 for (Register &Reg : Ops) {
560 if (Reg != 0)
561 SrcTy = MRI.getType(Reg);
562 }
563 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
564
565 Register UndefReg = 0;
566
567 for (Register &Reg : Ops) {
568 if (Reg == 0) {
569 if (UndefReg == 0)
570 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
571 Reg = UndefReg;
572 }
573 }
574
575 if (Ops.size() > 1)
576 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
577 else
578 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
579 MI.eraseFromParent();
580}
581
584 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
585 "Invalid instruction kind");
586 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
587 Register Src1 = MI.getOperand(1).getReg();
588 LLT SrcType = MRI.getType(Src1);
589
590 unsigned DstNumElts = DstType.getNumElements();
591 unsigned SrcNumElts = SrcType.getNumElements();
592
593 // If the resulting vector is smaller than the size of the source
594 // vectors being concatenated, we won't be able to replace the
595 // shuffle vector into a concat_vectors.
596 //
597 // Note: We may still be able to produce a concat_vectors fed by
598 // extract_vector_elt and so on. It is less clear that would
599 // be better though, so don't bother for now.
600 //
601 // If the destination is a scalar, the size of the sources doesn't
602 // matter. we will lower the shuffle to a plain copy. This will
603 // work only if the source and destination have the same size. But
604 // that's covered by the next condition.
605 //
606 // TODO: If the size between the source and destination don't match
607 // we could still emit an extract vector element in that case.
608 if (DstNumElts < 2 * SrcNumElts)
609 return false;
610
611 // Check that the shuffle mask can be broken evenly between the
612 // different sources.
613 if (DstNumElts % SrcNumElts != 0)
614 return false;
615
616 // Mask length is a multiple of the source vector length.
617 // Check if the shuffle is some kind of concatenation of the input
618 // vectors.
619 unsigned NumConcat = DstNumElts / SrcNumElts;
620 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
621 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
622 for (unsigned i = 0; i != DstNumElts; ++i) {
623 int Idx = Mask[i];
624 // Undef value.
625 if (Idx < 0)
626 continue;
627 // Ensure the indices in each SrcType sized piece are sequential and that
628 // the same source is used for the whole piece.
629 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
630 (ConcatSrcs[i / SrcNumElts] >= 0 &&
631 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
632 return false;
633 // Remember which source this index came from.
634 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
635 }
636
637 // The shuffle is concatenating multiple vectors together.
638 // Collect the different operands for that.
639 Register UndefReg;
640 Register Src2 = MI.getOperand(2).getReg();
641 for (auto Src : ConcatSrcs) {
642 if (Src < 0) {
643 if (!UndefReg) {
644 Builder.setInsertPt(*MI.getParent(), MI);
645 UndefReg = Builder.buildUndef(SrcType).getReg(0);
646 }
647 Ops.push_back(UndefReg);
648 } else if (Src == 0)
649 Ops.push_back(Src1);
650 else
651 Ops.push_back(Src2);
652 }
653 return true;
654}
655
657 ArrayRef<Register> Ops) const {
658 Register DstReg = MI.getOperand(0).getReg();
659 Builder.setInsertPt(*MI.getParent(), MI);
660 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
661
662 if (Ops.size() == 1)
663 Builder.buildCopy(NewDstReg, Ops[0]);
664 else
665 Builder.buildMergeLikeInstr(NewDstReg, Ops);
666
667 replaceRegWith(MRI, DstReg, NewDstReg);
668 MI.eraseFromParent();
669}
670
671namespace {
672
673/// Select a preference between two uses. CurrentUse is the current preference
674/// while *ForCandidate is attributes of the candidate under consideration.
675PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
676 PreferredTuple &CurrentUse,
677 const LLT TyForCandidate,
678 unsigned OpcodeForCandidate,
679 MachineInstr *MIForCandidate) {
680 if (!CurrentUse.Ty.isValid()) {
681 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
682 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
683 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
684 return CurrentUse;
685 }
686
687 // We permit the extend to hoist through basic blocks but this is only
688 // sensible if the target has extending loads. If you end up lowering back
689 // into a load and extend during the legalizer then the end result is
690 // hoisting the extend up to the load.
691
692 // Prefer defined extensions to undefined extensions as these are more
693 // likely to reduce the number of instructions.
694 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
695 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
696 return CurrentUse;
697 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
698 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
699 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
700
701 // Prefer sign extensions to zero extensions as sign-extensions tend to be
702 // more expensive. Don't do this if the load is already a zero-extend load
703 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
704 // later.
705 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
706 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
707 OpcodeForCandidate == TargetOpcode::G_ZEXT)
708 return CurrentUse;
709 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
710 OpcodeForCandidate == TargetOpcode::G_SEXT)
711 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
712 }
713
714 // This is potentially target specific. We've chosen the largest type
715 // because G_TRUNC is usually free. One potential catch with this is that
716 // some targets have a reduced number of larger registers than smaller
717 // registers and this choice potentially increases the live-range for the
718 // larger value.
719 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
720 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
721 }
722 return CurrentUse;
723}
724
725/// Find a suitable place to insert some instructions and insert them. This
726/// function accounts for special cases like inserting before a PHI node.
727/// The current strategy for inserting before PHI's is to duplicate the
728/// instructions for each predecessor. However, while that's ok for G_TRUNC
729/// on most targets since it generally requires no code, other targets/cases may
730/// want to try harder to find a dominating block.
731static void InsertInsnsWithoutSideEffectsBeforeUse(
734 MachineOperand &UseMO)>
735 Inserter) {
736 MachineInstr &UseMI = *UseMO.getParent();
737
738 MachineBasicBlock *InsertBB = UseMI.getParent();
739
740 // If the use is a PHI then we want the predecessor block instead.
741 if (UseMI.isPHI()) {
742 MachineOperand *PredBB = std::next(&UseMO);
743 InsertBB = PredBB->getMBB();
744 }
745
746 // If the block is the same block as the def then we want to insert just after
747 // the def instead of at the start of the block.
748 if (InsertBB == DefMI.getParent()) {
750 Inserter(InsertBB, std::next(InsertPt), UseMO);
751 return;
752 }
753
754 // Otherwise we want the start of the BB
755 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
756}
757} // end anonymous namespace
758
760 PreferredTuple Preferred;
761 if (matchCombineExtendingLoads(MI, Preferred)) {
762 applyCombineExtendingLoads(MI, Preferred);
763 return true;
764 }
765 return false;
766}
767
768static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
769 unsigned CandidateLoadOpc;
770 switch (ExtOpc) {
771 case TargetOpcode::G_ANYEXT:
772 CandidateLoadOpc = TargetOpcode::G_LOAD;
773 break;
774 case TargetOpcode::G_SEXT:
775 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
776 break;
777 case TargetOpcode::G_ZEXT:
778 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
779 break;
780 default:
781 llvm_unreachable("Unexpected extend opc");
782 }
783 return CandidateLoadOpc;
784}
785
787 MachineInstr &MI, PreferredTuple &Preferred) const {
788 // We match the loads and follow the uses to the extend instead of matching
789 // the extends and following the def to the load. This is because the load
790 // must remain in the same position for correctness (unless we also add code
791 // to find a safe place to sink it) whereas the extend is freely movable.
792 // It also prevents us from duplicating the load for the volatile case or just
793 // for performance.
794 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
795 if (!LoadMI)
796 return false;
797
798 Register LoadReg = LoadMI->getDstReg();
799
800 LLT LoadValueTy = MRI.getType(LoadReg);
801 if (!LoadValueTy.isScalar())
802 return false;
803
804 // Most architectures are going to legalize <s8 loads into at least a 1 byte
805 // load, and the MMOs can only describe memory accesses in multiples of bytes.
806 // If we try to perform extload combining on those, we can end up with
807 // %a(s8) = extload %ptr (load 1 byte from %ptr)
808 // ... which is an illegal extload instruction.
809 if (LoadValueTy.getSizeInBits() < 8)
810 return false;
811
812 // For non power-of-2 types, they will very likely be legalized into multiple
813 // loads. Don't bother trying to match them into extending loads.
815 return false;
816
817 // Find the preferred type aside from the any-extends (unless it's the only
818 // one) and non-extending ops. We'll emit an extending load to that type and
819 // and emit a variant of (extend (trunc X)) for the others according to the
820 // relative type sizes. At the same time, pick an extend to use based on the
821 // extend involved in the chosen type.
822 unsigned PreferredOpcode =
823 isa<GLoad>(&MI)
824 ? TargetOpcode::G_ANYEXT
825 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
826 Preferred = {LLT(), PreferredOpcode, nullptr};
827 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
828 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
829 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
830 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
831 const auto &MMO = LoadMI->getMMO();
832 // Don't do anything for atomics.
833 if (MMO.isAtomic())
834 continue;
835 // Check for legality.
836 if (!isPreLegalize()) {
837 LegalityQuery::MemDesc MMDesc(MMO);
838 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
839 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
840 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
841 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
842 .Action != LegalizeActions::Legal)
843 continue;
844 }
845 Preferred = ChoosePreferredUse(MI, Preferred,
846 MRI.getType(UseMI.getOperand(0).getReg()),
847 UseMI.getOpcode(), &UseMI);
848 }
849 }
850
851 // There were no extends
852 if (!Preferred.MI)
853 return false;
854 // It should be impossible to chose an extend without selecting a different
855 // type since by definition the result of an extend is larger.
856 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
857
858 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
859 return true;
860}
861
863 MachineInstr &MI, PreferredTuple &Preferred) const {
864 // Rewrite the load to the chosen extending load.
865 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
866
867 // Inserter to insert a truncate back to the original type at a given point
868 // with some basic CSE to limit truncate duplication to one per BB.
870 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
871 MachineBasicBlock::iterator InsertBefore,
872 MachineOperand &UseMO) {
873 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
874 if (PreviouslyEmitted) {
875 Observer.changingInstr(*UseMO.getParent());
876 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
877 Observer.changedInstr(*UseMO.getParent());
878 return;
879 }
880
881 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
882 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
883 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
884 EmittedInsns[InsertIntoBB] = NewMI;
885 replaceRegOpWith(MRI, UseMO, NewDstReg);
886 };
887
888 Observer.changingInstr(MI);
889 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
890 MI.setDesc(Builder.getTII().get(LoadOpc));
891
892 // Rewrite all the uses to fix up the types.
893 auto &LoadValue = MI.getOperand(0);
895 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
896
897 for (auto *UseMO : Uses) {
898 MachineInstr *UseMI = UseMO->getParent();
899
900 // If the extend is compatible with the preferred extend then we should fix
901 // up the type and extend so that it uses the preferred use.
902 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
903 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
904 Register UseDstReg = UseMI->getOperand(0).getReg();
905 MachineOperand &UseSrcMO = UseMI->getOperand(1);
906 const LLT UseDstTy = MRI.getType(UseDstReg);
907 if (UseDstReg != ChosenDstReg) {
908 if (Preferred.Ty == UseDstTy) {
909 // If the use has the same type as the preferred use, then merge
910 // the vregs and erase the extend. For example:
911 // %1:_(s8) = G_LOAD ...
912 // %2:_(s32) = G_SEXT %1(s8)
913 // %3:_(s32) = G_ANYEXT %1(s8)
914 // ... = ... %3(s32)
915 // rewrites to:
916 // %2:_(s32) = G_SEXTLOAD ...
917 // ... = ... %2(s32)
918 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
919 Observer.erasingInstr(*UseMO->getParent());
920 UseMO->getParent()->eraseFromParent();
921 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
922 // If the preferred size is smaller, then keep the extend but extend
923 // from the result of the extending load. For example:
924 // %1:_(s8) = G_LOAD ...
925 // %2:_(s32) = G_SEXT %1(s8)
926 // %3:_(s64) = G_ANYEXT %1(s8)
927 // ... = ... %3(s64)
928 /// rewrites to:
929 // %2:_(s32) = G_SEXTLOAD ...
930 // %3:_(s64) = G_ANYEXT %2:_(s32)
931 // ... = ... %3(s64)
932 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
933 } else {
934 // If the preferred size is large, then insert a truncate. For
935 // example:
936 // %1:_(s8) = G_LOAD ...
937 // %2:_(s64) = G_SEXT %1(s8)
938 // %3:_(s32) = G_ZEXT %1(s8)
939 // ... = ... %3(s32)
940 /// rewrites to:
941 // %2:_(s64) = G_SEXTLOAD ...
942 // %4:_(s8) = G_TRUNC %2:_(s32)
943 // %3:_(s64) = G_ZEXT %2:_(s8)
944 // ... = ... %3(s64)
945 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
946 InsertTruncAt);
947 }
948 continue;
949 }
950 // The use is (one of) the uses of the preferred use we chose earlier.
951 // We're going to update the load to def this value later so just erase
952 // the old extend.
953 Observer.erasingInstr(*UseMO->getParent());
954 UseMO->getParent()->eraseFromParent();
955 continue;
956 }
957
958 // The use isn't an extend. Truncate back to the type we originally loaded.
959 // This is free on many targets.
960 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
961 }
962
963 MI.getOperand(0).setReg(ChosenDstReg);
964 Observer.changedInstr(MI);
965}
966
968 BuildFnTy &MatchInfo) const {
969 assert(MI.getOpcode() == TargetOpcode::G_AND);
970
971 // If we have the following code:
972 // %mask = G_CONSTANT 255
973 // %ld = G_LOAD %ptr, (load s16)
974 // %and = G_AND %ld, %mask
975 //
976 // Try to fold it into
977 // %ld = G_ZEXTLOAD %ptr, (load s8)
978
979 Register Dst = MI.getOperand(0).getReg();
980 if (MRI.getType(Dst).isVector())
981 return false;
982
983 auto MaybeMask =
984 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
985 if (!MaybeMask)
986 return false;
987
988 APInt MaskVal = MaybeMask->Value;
989
990 if (!MaskVal.isMask())
991 return false;
992
993 Register SrcReg = MI.getOperand(1).getReg();
994 // Don't use getOpcodeDef() here since intermediate instructions may have
995 // multiple users.
996 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
997 if (!LoadMI)
998 return false;
999
1000 Register LoadReg = LoadMI->getDstReg();
1001 LLT RegTy = MRI.getType(LoadReg);
1002 Register PtrReg = LoadMI->getPointerReg();
1003 unsigned RegSize = RegTy.getSizeInBits();
1004 unsigned LoadSizeBits = LoadMI->getMemSizeInBits().getValue();
1005 unsigned MaskSizeBits = MaskVal.countr_one();
1006
1007 if ((isa<GSExtLoad>(LoadMI) || MaskSizeBits < LoadSizeBits) &&
1008 !MRI.hasOneNonDBGUse(LoadReg))
1009 return false;
1010
1011 // The mask may not be larger than the in-memory type, as it might cover sign
1012 // extended bits
1013 if (MaskSizeBits > LoadSizeBits)
1014 return false;
1015
1016 // If the mask covers the whole destination register, there's nothing to
1017 // extend
1018 if (MaskSizeBits >= RegSize)
1019 return false;
1020
1021 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
1022 // at least byte loads. Avoid creating such loads here
1023 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
1024 return false;
1025
1026 const MachineMemOperand &MMO = LoadMI->getMMO();
1027 LegalityQuery::MemDesc MemDesc(MMO);
1028
1029 // Don't modify the memory access size if this is atomic/volatile, but we can
1030 // still adjust the opcode to indicate the high bit behavior.
1031 if (LoadMI->isSimple())
1032 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
1033 else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
1034 return false;
1035
1036 // TODO: Could check if it's legal with the reduced or original memory size.
1038 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
1039 return false;
1040
1041 MatchInfo = [=](MachineIRBuilder &B) {
1042 B.setInstrAndDebugLoc(*LoadMI);
1043 auto &MF = B.getMF();
1044 auto PtrInfo = MMO.getPointerInfo();
1045 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
1046 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
1047 replaceRegWith(MRI, LoadReg, Dst);
1048 LoadMI->eraseFromParent();
1049 };
1050 return true;
1051}
1052
1054 const MachineInstr &UseMI) const {
1055 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1056 "shouldn't consider debug uses");
1057 assert(DefMI.getParent() == UseMI.getParent());
1058 if (&DefMI == &UseMI)
1059 return true;
1060 const MachineBasicBlock &MBB = *DefMI.getParent();
1061 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1062 return &MI == &DefMI || &MI == &UseMI;
1063 });
1064 if (DefOrUse == MBB.end())
1065 llvm_unreachable("Block must contain both DefMI and UseMI!");
1066 return &*DefOrUse == &DefMI;
1067}
1068
1070 const MachineInstr &UseMI) const {
1071 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1072 "shouldn't consider debug uses");
1073 if (MDT)
1074 return MDT->dominates(&DefMI, &UseMI);
1075 else if (DefMI.getParent() != UseMI.getParent())
1076 return false;
1077
1078 return isPredecessor(DefMI, UseMI);
1079}
1080
1082 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1083 Register SrcReg = MI.getOperand(1).getReg();
1084 Register LoadUser = SrcReg;
1085
1086 if (MRI.getType(SrcReg).isVector())
1087 return false;
1088
1089 Register TruncSrc;
1090 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1091 LoadUser = TruncSrc;
1092
1093 uint64_t SizeInBits = MI.getOperand(2).getImm();
1094 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1095 // need any extend at all, just a truncate.
1096 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1097 // If truncating more than the original extended value, abort.
1098 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1099 if (TruncSrc &&
1100 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1101 return false;
1102 if (LoadSizeBits == SizeInBits)
1103 return true;
1104 }
1105 return false;
1106}
1107
1109 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1110 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1111 MI.eraseFromParent();
1112}
1113
1115 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1116 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1117
1118 Register DstReg = MI.getOperand(0).getReg();
1119 LLT RegTy = MRI.getType(DstReg);
1120
1121 // Only supports scalars for now.
1122 if (RegTy.isVector())
1123 return false;
1124
1125 Register SrcReg = MI.getOperand(1).getReg();
1126 auto *LoadDef = dyn_cast<GLoad>(MRI.getVRegDef(SrcReg));
1127 if (!LoadDef)
1128 return false;
1129
1130 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1131 uint64_t ExtFrom = MI.getOperand(2).getImm();
1132
1133 if (MemBits > ExtFrom && !MRI.hasOneNonDBGUse(SrcReg))
1134 return false;
1135
1136 // If the sign extend extends from a narrower width than the load's width,
1137 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1138 // Avoid widening the load at all.
1139 unsigned NewSizeBits = std::min(ExtFrom, MemBits);
1140
1141 // Don't generate G_SEXTLOADs with a < 1 byte width.
1142 if (NewSizeBits < 8)
1143 return false;
1144 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1145 // anyway for most targets.
1146 if (!isPowerOf2_32(NewSizeBits))
1147 return false;
1148
1149 const MachineMemOperand &MMO = LoadDef->getMMO();
1150 LegalityQuery::MemDesc MMDesc(MMO);
1151
1152 // Don't modify the memory access size if this is atomic/volatile, but we can
1153 // still adjust the opcode to indicate the high bit behavior.
1154 if (LoadDef->isSimple())
1155 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1156 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1157 return false;
1158
1159 // TODO: Could check if it's legal with the reduced or original memory size.
1160 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1161 {MRI.getType(LoadDef->getDstReg()),
1162 MRI.getType(LoadDef->getPointerReg())},
1163 {MMDesc}}))
1164 return false;
1165
1166 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1167 return true;
1168}
1169
1171 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1172 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1173 Register LoadReg;
1174 unsigned ScalarSizeBits;
1175 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1176 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1177
1178 // If we have the following:
1179 // %ld = G_LOAD %ptr, (load 2)
1180 // %ext = G_SEXT_INREG %ld, 8
1181 // ==>
1182 // %ld = G_SEXTLOAD %ptr (load 1)
1183
1184 auto &MMO = LoadDef->getMMO();
1185 Builder.setInstrAndDebugLoc(*LoadDef);
1186 auto &MF = Builder.getMF();
1187 auto PtrInfo = MMO.getPointerInfo();
1188 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1189 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1190 LoadDef->getPointerReg(), *NewMMO);
1191 replaceRegWith(MRI, LoadReg, MI.getOperand(0).getReg());
1192 MI.eraseFromParent();
1193
1194 // Not all loads can be deleted, so make sure the old one is removed.
1195 LoadDef->eraseFromParent();
1196}
1197
1198/// Return true if 'MI' is a load or a store that may be fold it's address
1199/// operand into the load / store addressing mode.
1201 MachineRegisterInfo &MRI) {
1203 auto *MF = MI->getMF();
1204 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1205 if (!Addr)
1206 return false;
1207
1208 AM.HasBaseReg = true;
1209 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1210 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1211 else
1212 AM.Scale = 1; // [reg +/- reg]
1213
1214 return TLI.isLegalAddressingMode(
1215 MF->getDataLayout(), AM,
1216 getTypeForLLT(MI->getMMO().getMemoryType(),
1217 MF->getFunction().getContext()),
1218 MI->getMMO().getAddrSpace());
1219}
1220
1221static unsigned getIndexedOpc(unsigned LdStOpc) {
1222 switch (LdStOpc) {
1223 case TargetOpcode::G_LOAD:
1224 return TargetOpcode::G_INDEXED_LOAD;
1225 case TargetOpcode::G_STORE:
1226 return TargetOpcode::G_INDEXED_STORE;
1227 case TargetOpcode::G_ZEXTLOAD:
1228 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1229 case TargetOpcode::G_SEXTLOAD:
1230 return TargetOpcode::G_INDEXED_SEXTLOAD;
1231 default:
1232 llvm_unreachable("Unexpected opcode");
1233 }
1234}
1235
1236bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1237 // Check for legality.
1238 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1239 LLT Ty = MRI.getType(LdSt.getReg(0));
1240 LLT MemTy = LdSt.getMMO().getMemoryType();
1242 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1244 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1245 SmallVector<LLT> OpTys;
1246 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1247 OpTys = {PtrTy, Ty, Ty};
1248 else
1249 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1250
1251 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1252 return isLegal(Q);
1253}
1254
1256 "post-index-use-threshold", cl::Hidden, cl::init(32),
1257 cl::desc("Number of uses of a base pointer to check before it is no longer "
1258 "considered for post-indexing."));
1259
1260bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1262 bool &RematOffset) const {
1263 // We're looking for the following pattern, for either load or store:
1264 // %baseptr:_(p0) = ...
1265 // G_STORE %val(s64), %baseptr(p0)
1266 // %offset:_(s64) = G_CONSTANT i64 -256
1267 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1268 const auto &TLI = getTargetLowering();
1269
1270 Register Ptr = LdSt.getPointerReg();
1271 // If the store is the only use, don't bother.
1272 if (MRI.hasOneNonDBGUse(Ptr))
1273 return false;
1274
1275 if (!isIndexedLoadStoreLegal(LdSt))
1276 return false;
1277
1278 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1279 return false;
1280
1281 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1282 auto *PtrDef = MRI.getVRegDef(Ptr);
1283
1284 unsigned NumUsesChecked = 0;
1285 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1286 if (++NumUsesChecked > PostIndexUseThreshold)
1287 return false; // Try to avoid exploding compile time.
1288
1289 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1290 // The use itself might be dead. This can happen during combines if DCE
1291 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1292 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1293 continue;
1294
1295 // Check the user of this isn't the store, otherwise we'd be generate a
1296 // indexed store defining its own use.
1297 if (StoredValDef == &Use)
1298 continue;
1299
1300 Offset = PtrAdd->getOffsetReg();
1301 if (!ForceLegalIndexing &&
1302 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1303 /*IsPre*/ false, MRI))
1304 continue;
1305
1306 // Make sure the offset calculation is before the potentially indexed op.
1307 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1308 RematOffset = false;
1309 if (!dominates(*OffsetDef, LdSt)) {
1310 // If the offset however is just a G_CONSTANT, we can always just
1311 // rematerialize it where we need it.
1312 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1313 continue;
1314 RematOffset = true;
1315 }
1316
1317 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1318 if (&BasePtrUse == PtrDef)
1319 continue;
1320
1321 // If the user is a later load/store that can be post-indexed, then don't
1322 // combine this one.
1323 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1324 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1325 dominates(LdSt, *BasePtrLdSt) &&
1326 isIndexedLoadStoreLegal(*BasePtrLdSt))
1327 return false;
1328
1329 // Now we're looking for the key G_PTR_ADD instruction, which contains
1330 // the offset add that we want to fold.
1331 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1332 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1333 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1334 // If the use is in a different block, then we may produce worse code
1335 // due to the extra register pressure.
1336 if (BaseUseUse.getParent() != LdSt.getParent())
1337 return false;
1338
1339 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1340 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1341 return false;
1342 }
1343 if (!dominates(LdSt, BasePtrUse))
1344 return false; // All use must be dominated by the load/store.
1345 }
1346 }
1347
1348 Addr = PtrAdd->getReg(0);
1349 Base = PtrAdd->getBaseReg();
1350 return true;
1351 }
1352
1353 return false;
1354}
1355
1356bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1357 Register &Base,
1358 Register &Offset) const {
1359 auto &MF = *LdSt.getParent()->getParent();
1360 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1361
1362 Addr = LdSt.getPointerReg();
1363 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1364 MRI.hasOneNonDBGUse(Addr))
1365 return false;
1366
1367 if (!ForceLegalIndexing &&
1368 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1369 return false;
1370
1371 if (!isIndexedLoadStoreLegal(LdSt))
1372 return false;
1373
1374 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1375 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1376 return false;
1377
1378 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1379 // Would require a copy.
1380 if (Base == St->getValueReg())
1381 return false;
1382
1383 // We're expecting one use of Addr in MI, but it could also be the
1384 // value stored, which isn't actually dominated by the instruction.
1385 if (St->getValueReg() == Addr)
1386 return false;
1387 }
1388
1389 // Avoid increasing cross-block register pressure.
1390 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1391 if (AddrUse.getParent() != LdSt.getParent())
1392 return false;
1393
1394 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1395 // That might allow us to end base's liveness here by adjusting the constant.
1396 bool RealUse = false;
1397 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1398 if (!dominates(LdSt, AddrUse))
1399 return false; // All use must be dominated by the load/store.
1400
1401 // If Ptr may be folded in addressing mode of other use, then it's
1402 // not profitable to do this transformation.
1403 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1404 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1405 RealUse = true;
1406 } else {
1407 RealUse = true;
1408 }
1409 }
1410 return RealUse;
1411}
1412
1414 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1415 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1416
1417 // Check if there is a load that defines the vector being extracted from.
1418 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1419 if (!LoadMI)
1420 return false;
1421
1422 Register Vector = MI.getOperand(1).getReg();
1423 LLT VecEltTy = MRI.getType(Vector).getElementType();
1424
1425 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1426
1427 // Checking whether we should reduce the load width.
1428 if (!MRI.hasOneNonDBGUse(Vector))
1429 return false;
1430
1431 // Check if the defining load is simple.
1432 if (!LoadMI->isSimple())
1433 return false;
1434
1435 // If the vector element type is not a multiple of a byte then we are unable
1436 // to correctly compute an address to load only the extracted element as a
1437 // scalar.
1438 if (!VecEltTy.isByteSized())
1439 return false;
1440
1441 // Check for load fold barriers between the extraction and the load.
1442 if (MI.getParent() != LoadMI->getParent())
1443 return false;
1444 const unsigned MaxIter = 20;
1445 unsigned Iter = 0;
1446 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1447 if (II->isLoadFoldBarrier())
1448 return false;
1449 if (Iter++ == MaxIter)
1450 return false;
1451 }
1452
1453 // Check if the new load that we are going to create is legal
1454 // if we are in the post-legalization phase.
1455 MachineMemOperand MMO = LoadMI->getMMO();
1456 Align Alignment = MMO.getAlign();
1457 MachinePointerInfo PtrInfo;
1459
1460 // Finding the appropriate PtrInfo if offset is a known constant.
1461 // This is required to create the memory operand for the narrowed load.
1462 // This machine memory operand object helps us infer about legality
1463 // before we proceed to combine the instruction.
1464 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1465 int Elt = CVal->getZExtValue();
1466 // FIXME: should be (ABI size)*Elt.
1467 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1468 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1469 } else {
1470 // Discard the pointer info except the address space because the memory
1471 // operand can't represent this new access since the offset is variable.
1472 Offset = VecEltTy.getSizeInBits() / 8;
1474 }
1475
1476 Alignment = commonAlignment(Alignment, Offset);
1477
1478 Register VecPtr = LoadMI->getPointerReg();
1479 LLT PtrTy = MRI.getType(VecPtr);
1480
1481 MachineFunction &MF = *MI.getMF();
1482 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1483
1484 LegalityQuery::MemDesc MMDesc(*NewMMO);
1485
1487 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1488 return false;
1489
1490 // Load must be allowed and fast on the target.
1492 auto &DL = MF.getDataLayout();
1493 unsigned Fast = 0;
1494 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1495 &Fast) ||
1496 !Fast)
1497 return false;
1498
1499 Register Result = MI.getOperand(0).getReg();
1500 Register Index = MI.getOperand(2).getReg();
1501
1502 MatchInfo = [=](MachineIRBuilder &B) {
1503 GISelObserverWrapper DummyObserver;
1504 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1505 //// Get pointer to the vector element.
1506 Register finalPtr = Helper.getVectorElementPointer(
1507 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1508 Index);
1509 // New G_LOAD instruction.
1510 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1511 // Remove original GLOAD instruction.
1512 LoadMI->eraseFromParent();
1513 };
1514
1515 return true;
1516}
1517
1519 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1520 auto &LdSt = cast<GLoadStore>(MI);
1521
1522 if (LdSt.isAtomic())
1523 return false;
1524
1525 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1526 MatchInfo.Offset);
1527 if (!MatchInfo.IsPre &&
1528 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1529 MatchInfo.Offset, MatchInfo.RematOffset))
1530 return false;
1531
1532 return true;
1533}
1534
1536 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1537 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1538 unsigned Opcode = MI.getOpcode();
1539 bool IsStore = Opcode == TargetOpcode::G_STORE;
1540 unsigned NewOpcode = getIndexedOpc(Opcode);
1541
1542 // If the offset constant didn't happen to dominate the load/store, we can
1543 // just clone it as needed.
1544 if (MatchInfo.RematOffset) {
1545 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1546 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1547 *OldCst->getOperand(1).getCImm());
1548 MatchInfo.Offset = NewCst.getReg(0);
1549 }
1550
1551 auto MIB = Builder.buildInstr(NewOpcode);
1552 if (IsStore) {
1553 MIB.addDef(MatchInfo.Addr);
1554 MIB.addUse(MI.getOperand(0).getReg());
1555 } else {
1556 MIB.addDef(MI.getOperand(0).getReg());
1557 MIB.addDef(MatchInfo.Addr);
1558 }
1559
1560 MIB.addUse(MatchInfo.Base);
1561 MIB.addUse(MatchInfo.Offset);
1562 MIB.addImm(MatchInfo.IsPre);
1563 MIB->cloneMemRefs(*MI.getMF(), MI);
1564 MI.eraseFromParent();
1565 AddrDef.eraseFromParent();
1566
1567 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1568}
1569
1571 MachineInstr *&OtherMI) const {
1572 unsigned Opcode = MI.getOpcode();
1573 bool IsDiv, IsSigned;
1574
1575 switch (Opcode) {
1576 default:
1577 llvm_unreachable("Unexpected opcode!");
1578 case TargetOpcode::G_SDIV:
1579 case TargetOpcode::G_UDIV: {
1580 IsDiv = true;
1581 IsSigned = Opcode == TargetOpcode::G_SDIV;
1582 break;
1583 }
1584 case TargetOpcode::G_SREM:
1585 case TargetOpcode::G_UREM: {
1586 IsDiv = false;
1587 IsSigned = Opcode == TargetOpcode::G_SREM;
1588 break;
1589 }
1590 }
1591
1592 Register Src1 = MI.getOperand(1).getReg();
1593 unsigned DivOpcode, RemOpcode, DivremOpcode;
1594 if (IsSigned) {
1595 DivOpcode = TargetOpcode::G_SDIV;
1596 RemOpcode = TargetOpcode::G_SREM;
1597 DivremOpcode = TargetOpcode::G_SDIVREM;
1598 } else {
1599 DivOpcode = TargetOpcode::G_UDIV;
1600 RemOpcode = TargetOpcode::G_UREM;
1601 DivremOpcode = TargetOpcode::G_UDIVREM;
1602 }
1603
1604 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1605 return false;
1606
1607 // Combine:
1608 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1609 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1610 // into:
1611 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1612
1613 // Combine:
1614 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1615 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1616 // into:
1617 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1618
1619 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1620 if (MI.getParent() == UseMI.getParent() &&
1621 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1622 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1623 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1624 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1625 OtherMI = &UseMI;
1626 return true;
1627 }
1628 }
1629
1630 return false;
1631}
1632
1634 MachineInstr *&OtherMI) const {
1635 unsigned Opcode = MI.getOpcode();
1636 assert(OtherMI && "OtherMI shouldn't be empty.");
1637
1638 Register DestDivReg, DestRemReg;
1639 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1640 DestDivReg = MI.getOperand(0).getReg();
1641 DestRemReg = OtherMI->getOperand(0).getReg();
1642 } else {
1643 DestDivReg = OtherMI->getOperand(0).getReg();
1644 DestRemReg = MI.getOperand(0).getReg();
1645 }
1646
1647 bool IsSigned =
1648 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1649
1650 // Check which instruction is first in the block so we don't break def-use
1651 // deps by "moving" the instruction incorrectly. Also keep track of which
1652 // instruction is first so we pick it's operands, avoiding use-before-def
1653 // bugs.
1654 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1655 Builder.setInstrAndDebugLoc(*FirstInst);
1656
1657 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1658 : TargetOpcode::G_UDIVREM,
1659 {DestDivReg, DestRemReg},
1660 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1661 MI.eraseFromParent();
1662 OtherMI->eraseFromParent();
1663}
1664
1666 MachineInstr &MI, MachineInstr *&BrCond) const {
1667 assert(MI.getOpcode() == TargetOpcode::G_BR);
1668
1669 // Try to match the following:
1670 // bb1:
1671 // G_BRCOND %c1, %bb2
1672 // G_BR %bb3
1673 // bb2:
1674 // ...
1675 // bb3:
1676
1677 // The above pattern does not have a fall through to the successor bb2, always
1678 // resulting in a branch no matter which path is taken. Here we try to find
1679 // and replace that pattern with conditional branch to bb3 and otherwise
1680 // fallthrough to bb2. This is generally better for branch predictors.
1681
1682 MachineBasicBlock *MBB = MI.getParent();
1684 if (BrIt == MBB->begin())
1685 return false;
1686 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1687
1688 BrCond = &*std::prev(BrIt);
1689 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1690 return false;
1691
1692 // Check that the next block is the conditional branch target. Also make sure
1693 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1694 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1695 return BrCondTarget != MI.getOperand(0).getMBB() &&
1696 MBB->isLayoutSuccessor(BrCondTarget);
1697}
1698
1700 MachineInstr &MI, MachineInstr *&BrCond) const {
1701 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1702 Builder.setInstrAndDebugLoc(*BrCond);
1703 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1704 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1705 // this to i1 only since we might not know for sure what kind of
1706 // compare generated the condition value.
1707 auto True = Builder.buildConstant(
1708 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1709 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1710
1711 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1712 Observer.changingInstr(MI);
1713 MI.getOperand(0).setMBB(FallthroughBB);
1714 Observer.changedInstr(MI);
1715
1716 // Change the conditional branch to use the inverted condition and
1717 // new target block.
1718 Observer.changingInstr(*BrCond);
1719 BrCond->getOperand(0).setReg(Xor.getReg(0));
1720 BrCond->getOperand(1).setMBB(BrTarget);
1721 Observer.changedInstr(*BrCond);
1722}
1723
1726 unsigned MaxLen) const {
1727 auto &[Dst, Src, KnownLen, Alignment, DstAlignCanChange, MemOps] = MatchInfo;
1728 return canLowerMemCpyFamily(MI, MRI, MaxLen, Dst, Src, KnownLen, Alignment,
1729 DstAlignCanChange, MemOps);
1730}
1731
1733 MachineInstr &MI, MemCpyFamilyLoweringInfo &MatchInfo) const {
1734 auto &[Dst, Src, KnownLen, Alignment, DstAlignCanChange, MemOps] = MatchInfo;
1735 MachineIRBuilder HelperBuilder(MI);
1736 GISelObserverWrapper DummyObserver;
1737 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1738 bool Changed = Helper.lowerMemCpyFamily(MI, Dst, Src, KnownLen, Alignment,
1739 DstAlignCanChange, MemOps) ==
1741 assert(Changed && "expected memcpy-family instruction to lower");
1742 (void)Changed;
1743}
1744
1746 unsigned MaxLen) const {
1747 MachineIRBuilder HelperBuilder(MI);
1748 GISelObserverWrapper DummyObserver;
1749 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1750 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1752}
1753
1755 const MachineRegisterInfo &MRI,
1756 const APFloat &Val) {
1757 APFloat Result(Val);
1758 switch (MI.getOpcode()) {
1759 default:
1760 llvm_unreachable("Unexpected opcode!");
1761 case TargetOpcode::G_FNEG: {
1762 Result.changeSign();
1763 return Result;
1764 }
1765 case TargetOpcode::G_FABS: {
1766 Result.clearSign();
1767 return Result;
1768 }
1769 case TargetOpcode::G_FCEIL:
1770 Result.roundToIntegral(APFloat::rmTowardPositive);
1771 return Result;
1772 case TargetOpcode::G_FFLOOR:
1773 Result.roundToIntegral(APFloat::rmTowardNegative);
1774 return Result;
1775 case TargetOpcode::G_INTRINSIC_TRUNC:
1776 Result.roundToIntegral(APFloat::rmTowardZero);
1777 return Result;
1778 case TargetOpcode::G_INTRINSIC_ROUND:
1779 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1780 return Result;
1781 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1782 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1783 return Result;
1784 case TargetOpcode::G_FRINT:
1785 case TargetOpcode::G_FNEARBYINT:
1786 // Use default rounding mode (round to nearest, ties to even)
1787 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1788 return Result;
1789 case TargetOpcode::G_FPEXT:
1790 case TargetOpcode::G_FPTRUNC: {
1791 bool Unused;
1792 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1794 &Unused);
1795 return Result;
1796 }
1797 case TargetOpcode::G_FSQRT: {
1798 bool Unused;
1800 &Unused);
1801 Result = APFloat(sqrt(Result.convertToDouble()));
1802 break;
1803 }
1804 case TargetOpcode::G_FLOG2: {
1805 bool Unused;
1807 &Unused);
1808 Result = APFloat(log2(Result.convertToDouble()));
1809 break;
1810 }
1811 }
1812 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1813 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1814 // `G_FLOG2` reach here.
1815 bool Unused;
1816 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1817 return Result;
1818}
1819
1821 MachineInstr &MI, const ConstantFP *Cst) const {
1822 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1823 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1824 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1825 MI.eraseFromParent();
1826}
1827
1829 PtrAddChain &MatchInfo) const {
1830 // We're trying to match the following pattern:
1831 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1832 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1833 // -->
1834 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1835
1836 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1837 return false;
1838
1839 Register Add2 = MI.getOperand(1).getReg();
1840 Register Imm1 = MI.getOperand(2).getReg();
1841 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1842 if (!MaybeImmVal)
1843 return false;
1844
1845 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1846 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1847 return false;
1848
1849 Register Base = Add2Def->getOperand(1).getReg();
1850 Register Imm2 = Add2Def->getOperand(2).getReg();
1851 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1852 if (!MaybeImm2Val)
1853 return false;
1854
1855 // Check if the new combined immediate forms an illegal addressing mode.
1856 // Do not combine if it was legal before but would get illegal.
1857 // To do so, we need to find a load/store user of the pointer to get
1858 // the access type.
1859 Type *AccessTy = nullptr;
1860 auto &MF = *MI.getMF();
1861 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1862 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1863 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1864 MF.getFunction().getContext());
1865 break;
1866 }
1867 }
1869 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1870 AMNew.BaseOffs = CombinedImm.getSExtValue();
1871 if (AccessTy) {
1872 AMNew.HasBaseReg = true;
1874 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1875 AMOld.HasBaseReg = true;
1876 unsigned AS = MRI.getType(Add2).getAddressSpace();
1877 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1878 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1879 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1880 return false;
1881 }
1882
1883 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1884 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1885 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1886 // largest signed integer that fits into the index type, which is the maximum
1887 // size of allocated objects according to the IR Language Reference.
1888 unsigned PtrAddFlags = MI.getFlags();
1889 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1890 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1891 bool IsInBounds =
1892 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1893 unsigned Flags = 0;
1894 if (IsNoUWrap)
1896 if (IsInBounds) {
1899 }
1900
1901 // Pass the combined immediate to the apply function.
1902 MatchInfo.Imm = AMNew.BaseOffs;
1903 MatchInfo.Base = Base;
1904 MatchInfo.Bank = getRegBank(Imm2);
1905 MatchInfo.Flags = Flags;
1906 return true;
1907}
1908
1910 PtrAddChain &MatchInfo) const {
1911 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1912 MachineIRBuilder MIB(MI);
1913 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1914 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1915 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1916 Observer.changingInstr(MI);
1917 MI.getOperand(1).setReg(MatchInfo.Base);
1918 MI.getOperand(2).setReg(NewOffset.getReg(0));
1919 MI.setFlags(MatchInfo.Flags);
1920 Observer.changedInstr(MI);
1921}
1922
1924 RegisterImmPair &MatchInfo) const {
1925 // We're trying to match the following pattern with any of
1926 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1927 // %t1 = SHIFT %base, G_CONSTANT imm1
1928 // %root = SHIFT %t1, G_CONSTANT imm2
1929 // -->
1930 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1931
1932 unsigned Opcode = MI.getOpcode();
1933 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1934 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1935 Opcode == TargetOpcode::G_USHLSAT) &&
1936 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1937
1938 Register Shl2 = MI.getOperand(1).getReg();
1939 Register Imm1 = MI.getOperand(2).getReg();
1940 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1941 if (!MaybeImmVal)
1942 return false;
1943
1944 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1945 if (Shl2Def->getOpcode() != Opcode)
1946 return false;
1947
1948 Register Base = Shl2Def->getOperand(1).getReg();
1949 Register Imm2 = Shl2Def->getOperand(2).getReg();
1950 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1951 if (!MaybeImm2Val)
1952 return false;
1953
1954 // Pass the combined immediate to the apply function.
1955 MatchInfo.Imm =
1956 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1957 MatchInfo.Reg = Base;
1958
1959 // There is no simple replacement for a saturating unsigned left shift that
1960 // exceeds the scalar size.
1961 if (Opcode == TargetOpcode::G_USHLSAT &&
1962 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1963 return false;
1964
1965 return true;
1966}
1967
1969 RegisterImmPair &MatchInfo) const {
1970 unsigned Opcode = MI.getOpcode();
1971 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1972 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1973 Opcode == TargetOpcode::G_USHLSAT) &&
1974 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1975
1976 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1977 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1978 auto Imm = MatchInfo.Imm;
1979
1980 if (Imm >= ScalarSizeInBits) {
1981 // Any logical shift that exceeds scalar size will produce zero.
1982 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1983 Builder.buildConstant(MI.getOperand(0), 0);
1984 MI.eraseFromParent();
1985 return;
1986 }
1987 // Arithmetic shift and saturating signed left shift have no effect beyond
1988 // scalar size.
1989 Imm = ScalarSizeInBits - 1;
1990 }
1991
1992 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1993 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1994 Observer.changingInstr(MI);
1995 MI.getOperand(1).setReg(MatchInfo.Reg);
1996 MI.getOperand(2).setReg(NewImm);
1997 Observer.changedInstr(MI);
1998}
1999
2001 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2002 // We're trying to match the following pattern with any of
2003 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
2004 // with any of G_AND/G_OR/G_XOR logic instructions.
2005 // %t1 = SHIFT %X, G_CONSTANT C0
2006 // %t2 = LOGIC %t1, %Y
2007 // %root = SHIFT %t2, G_CONSTANT C1
2008 // -->
2009 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
2010 // %t4 = SHIFT %Y, G_CONSTANT C1
2011 // %root = LOGIC %t3, %t4
2012 unsigned ShiftOpcode = MI.getOpcode();
2013 assert((ShiftOpcode == TargetOpcode::G_SHL ||
2014 ShiftOpcode == TargetOpcode::G_ASHR ||
2015 ShiftOpcode == TargetOpcode::G_LSHR ||
2016 ShiftOpcode == TargetOpcode::G_USHLSAT ||
2017 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
2018 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2019
2020 // Match a one-use bitwise logic op.
2021 Register LogicDest = MI.getOperand(1).getReg();
2022 if (!MRI.hasOneNonDBGUse(LogicDest))
2023 return false;
2024
2025 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
2026 unsigned LogicOpcode = LogicMI->getOpcode();
2027 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
2028 LogicOpcode != TargetOpcode::G_XOR)
2029 return false;
2030
2031 // Find a matching one-use shift by constant.
2032 const Register C1 = MI.getOperand(2).getReg();
2033 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
2034 if (!MaybeImmVal || MaybeImmVal->Value == 0)
2035 return false;
2036
2037 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
2038
2039 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
2040 // Shift should match previous one and should be a one-use.
2041 if (MI->getOpcode() != ShiftOpcode ||
2042 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2043 return false;
2044
2045 // Must be a constant.
2046 auto MaybeImmVal =
2047 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
2048 if (!MaybeImmVal)
2049 return false;
2050
2051 ShiftVal = MaybeImmVal->Value.getSExtValue();
2052 return true;
2053 };
2054
2055 // Logic ops are commutative, so check each operand for a match.
2056 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
2057 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
2058 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
2059 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
2060 uint64_t C0Val;
2061
2062 if (matchFirstShift(LogicMIOp1, C0Val)) {
2063 MatchInfo.LogicNonShiftReg = LogicMIReg2;
2064 MatchInfo.Shift2 = LogicMIOp1;
2065 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
2066 MatchInfo.LogicNonShiftReg = LogicMIReg1;
2067 MatchInfo.Shift2 = LogicMIOp2;
2068 } else
2069 return false;
2070
2071 MatchInfo.ValSum = C0Val + C1Val;
2072
2073 // The fold is not valid if the sum of the shift values exceeds bitwidth.
2074 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
2075 return false;
2076
2077 MatchInfo.Logic = LogicMI;
2078 return true;
2079}
2080
2082 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2083 unsigned Opcode = MI.getOpcode();
2084 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2085 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2086 Opcode == TargetOpcode::G_SSHLSAT) &&
2087 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2088
2089 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2090 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2091
2092 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2093
2094 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2095 Register Shift1 =
2096 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2097
2098 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2099 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2100 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2101 // remove old shift1. And it will cause crash later. So erase it earlier to
2102 // avoid the crash.
2103 MatchInfo.Shift2->eraseFromParent();
2104
2105 Register Shift2Const = MI.getOperand(2).getReg();
2106 Register Shift2 = Builder
2107 .buildInstr(Opcode, {DestType},
2108 {MatchInfo.LogicNonShiftReg, Shift2Const})
2109 .getReg(0);
2110
2111 Register Dest = MI.getOperand(0).getReg();
2112 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2113
2114 // This was one use so it's safe to remove it.
2115 MatchInfo.Logic->eraseFromParent();
2116
2117 MI.eraseFromParent();
2118}
2119
2121 BuildFnTy &MatchInfo) const {
2122 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2123 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2124 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2125 auto &Shl = cast<GenericMachineInstr>(MI);
2126 Register DstReg = Shl.getReg(0);
2127 Register SrcReg = Shl.getReg(1);
2128 Register ShiftReg = Shl.getReg(2);
2129 Register X, C1;
2130
2131 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2132 return false;
2133
2134 if (!mi_match(SrcReg, MRI,
2136 m_GOr(m_Reg(X), m_Reg(C1))))))
2137 return false;
2138
2139 APInt C1Val, C2Val;
2140 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2141 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2142 return false;
2143
2144 auto *SrcDef = MRI.getVRegDef(SrcReg);
2145 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2146 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2147 LLT SrcTy = MRI.getType(SrcReg);
2148 MatchInfo = [=](MachineIRBuilder &B) {
2149 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2150 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2151 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2152 };
2153 return true;
2154}
2155
2157 LshrOfTruncOfLshr &MatchInfo,
2158 MachineInstr &ShiftMI) const {
2159 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2160
2161 Register N0 = MI.getOperand(1).getReg();
2162 Register N1 = MI.getOperand(2).getReg();
2163 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2164
2165 APInt N1C, N001C;
2166 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2167 return false;
2168 auto N001 = ShiftMI.getOperand(2).getReg();
2169 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2170 return false;
2171
2172 if (N001C.getBitWidth() > N1C.getBitWidth())
2173 N1C = N1C.zext(N001C.getBitWidth());
2174 else
2175 N001C = N001C.zext(N1C.getBitWidth());
2176
2177 Register InnerShift = ShiftMI.getOperand(0).getReg();
2178 LLT InnerShiftTy = MRI.getType(InnerShift);
2179 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2180 if ((N1C + N001C).ult(InnerShiftSize)) {
2181 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2182 MatchInfo.ShiftAmt = N1C + N001C;
2183 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2184 MatchInfo.InnerShiftTy = InnerShiftTy;
2185
2186 if ((N001C + OpSizeInBits) == InnerShiftSize)
2187 return true;
2188 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2189 MatchInfo.Mask = true;
2190 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2191 return true;
2192 }
2193 }
2194 return false;
2195}
2196
2198 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2199 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2200
2201 Register Dst = MI.getOperand(0).getReg();
2202 auto ShiftAmt =
2203 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2204 auto Shift =
2205 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2206 if (MatchInfo.Mask == true) {
2207 APInt MaskVal =
2209 MatchInfo.MaskVal.getZExtValue());
2210 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2211 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2212 Builder.buildTrunc(Dst, And);
2213 } else
2214 Builder.buildTrunc(Dst, Shift);
2215 MI.eraseFromParent();
2216}
2217
2219 unsigned &ShiftVal) const {
2220 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2221 auto MaybeImmVal =
2222 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2223 if (!MaybeImmVal)
2224 return false;
2225
2226 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2227 return (static_cast<int32_t>(ShiftVal) != -1);
2228}
2229
2231 unsigned &ShiftVal) const {
2232 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2233 MachineIRBuilder MIB(MI);
2234 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2235 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2236 Observer.changingInstr(MI);
2237 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2238 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2239 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2241 Observer.changedInstr(MI);
2242}
2243
2245 BuildFnTy &MatchInfo) const {
2246 GSub &Sub = cast<GSub>(MI);
2247
2248 LLT Ty = MRI.getType(Sub.getReg(0));
2249
2250 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2251 return false;
2252
2254 return false;
2255
2256 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2257
2258 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2259 auto NegCst = B.buildConstant(Ty, -Imm);
2260 Observer.changingInstr(MI);
2261 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2262 MI.getOperand(2).setReg(NegCst.getReg(0));
2264 if (Imm.isMinSignedValue())
2266 Observer.changedInstr(MI);
2267 };
2268 return true;
2269}
2270
2271// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2273 RegisterImmPair &MatchData) const {
2274 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2275 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2276 return false;
2277
2278 Register LHS = MI.getOperand(1).getReg();
2279
2280 Register ExtSrc;
2281 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2282 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2283 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2284 return false;
2285
2286 Register RHS = MI.getOperand(2).getReg();
2287 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2288 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2289 if (!MaybeShiftAmtVal)
2290 return false;
2291
2292 if (LI) {
2293 LLT SrcTy = MRI.getType(ExtSrc);
2294
2295 // We only really care about the legality with the shifted value. We can
2296 // pick any type the constant shift amount, so ask the target what to
2297 // use. Otherwise we would have to guess and hope it is reported as legal.
2298 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2299 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2300 return false;
2301 }
2302
2303 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2304 MatchData.Reg = ExtSrc;
2305 MatchData.Imm = ShiftAmt;
2306
2307 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2308 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2309 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2310}
2311
2313 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2314 Register ExtSrcReg = MatchData.Reg;
2315 int64_t ShiftAmtVal = MatchData.Imm;
2316
2317 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2318 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2319 auto NarrowShift =
2320 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2321 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2322 MI.eraseFromParent();
2323}
2324
2326 Register &MatchInfo) const {
2328 SmallVector<Register, 16> MergedValues;
2329 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2330 MergedValues.emplace_back(Merge.getSourceReg(I));
2331
2332 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2333 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2334 return false;
2335
2336 for (unsigned I = 0; I < MergedValues.size(); ++I)
2337 if (MergedValues[I] != Unmerge->getReg(I))
2338 return false;
2339
2340 MatchInfo = Unmerge->getSourceReg();
2341 return true;
2342}
2343
2345 const MachineRegisterInfo &MRI) {
2346 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2347 ;
2348
2349 return Reg;
2350}
2351
2353 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2354 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2355 "Expected an unmerge");
2356 auto &Unmerge = cast<GUnmerge>(MI);
2357 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2358
2359 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2360 if (!SrcInstr)
2361 return false;
2362
2363 // Check the source type of the merge.
2364 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2365 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2366 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2367 if (SrcMergeTy != Dst0Ty && !SameSize)
2368 return false;
2369 // They are the same now (modulo a bitcast).
2370 // We can collect all the src registers.
2371 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2372 Operands.push_back(SrcInstr->getSourceReg(Idx));
2373 return true;
2374}
2375
2377 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2378 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2379 "Expected an unmerge");
2380 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2381 "Not enough operands to replace all defs");
2382 unsigned NumElems = MI.getNumOperands() - 1;
2383
2384 LLT SrcTy = MRI.getType(Operands[0]);
2385 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2386 bool CanReuseInputDirectly = DstTy == SrcTy;
2387 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2388 Register DstReg = MI.getOperand(Idx).getReg();
2389 Register SrcReg = Operands[Idx];
2390
2391 // This combine may run after RegBankSelect, so we need to be aware of
2392 // register banks.
2393 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2394 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2395 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2396 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2397 }
2398
2399 if (CanReuseInputDirectly)
2400 replaceRegWith(MRI, DstReg, SrcReg);
2401 else
2402 Builder.buildCast(DstReg, SrcReg);
2403 }
2404 MI.eraseFromParent();
2405}
2406
2408 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2409 unsigned SrcIdx = MI.getNumOperands() - 1;
2410 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2411 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2412 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2413 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2414 return false;
2415 // Break down the big constant in smaller ones.
2416 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2417 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2418 ? CstVal.getCImm()->getValue()
2419 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2420
2421 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2422 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2423 // Unmerge a constant.
2424 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2425 Csts.emplace_back(Val.trunc(ShiftAmt));
2426 Val = Val.lshr(ShiftAmt);
2427 }
2428
2429 return true;
2430}
2431
2433 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2434 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2435 "Expected an unmerge");
2436 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2437 "Not enough operands to replace all defs");
2438 unsigned NumElems = MI.getNumOperands() - 1;
2439 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2440 Register DstReg = MI.getOperand(Idx).getReg();
2441 Builder.buildConstant(DstReg, Csts[Idx]);
2442 }
2443
2444 MI.eraseFromParent();
2445}
2446
2449 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2450 unsigned SrcIdx = MI.getNumOperands() - 1;
2451 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2452 MatchInfo = [&MI](MachineIRBuilder &B) {
2453 unsigned NumElems = MI.getNumOperands() - 1;
2454 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2455 Register DstReg = MI.getOperand(Idx).getReg();
2456 B.buildUndef(DstReg);
2457 }
2458 };
2459 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2460}
2461
2463 MachineInstr &MI) const {
2464 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2465 "Expected an unmerge");
2466 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2467 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2468 return false;
2469 // Check that all the lanes are dead except the first one.
2470 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2471 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2472 return false;
2473 }
2474 return true;
2475}
2476
2478 MachineInstr &MI) const {
2479 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2480 Register Dst0Reg = MI.getOperand(0).getReg();
2481 Builder.buildTrunc(Dst0Reg, SrcReg);
2482 MI.eraseFromParent();
2483}
2484
2486 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2487 "Expected an unmerge");
2488 Register Dst0Reg = MI.getOperand(0).getReg();
2489 LLT Dst0Ty = MRI.getType(Dst0Reg);
2490 // G_ZEXT on vector applies to each lane, so it will
2491 // affect all destinations. Therefore we won't be able
2492 // to simplify the unmerge to just the first definition.
2493 if (Dst0Ty.isVector())
2494 return false;
2495 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2496 LLT SrcTy = MRI.getType(SrcReg);
2497 if (SrcTy.isVector())
2498 return false;
2499
2500 Register ZExtSrcReg;
2501 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2502 return false;
2503
2504 // Finally we can replace the first definition with
2505 // a zext of the source if the definition is big enough to hold
2506 // all of ZExtSrc bits.
2507 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2508 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2509}
2510
2512 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2513 "Expected an unmerge");
2514
2515 Register Dst0Reg = MI.getOperand(0).getReg();
2516
2517 MachineInstr *ZExtInstr =
2518 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2519 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2520 "Expecting a G_ZEXT");
2521
2522 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2523 LLT Dst0Ty = MRI.getType(Dst0Reg);
2524 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2525
2526 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2527 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2528 } else {
2529 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2530 "ZExt src doesn't fit in destination");
2531 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2532 }
2533
2534 Register ZeroReg;
2535 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2536 if (!ZeroReg)
2537 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2538 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2539 }
2540 MI.eraseFromParent();
2541}
2542
2544 unsigned TargetShiftSize,
2545 unsigned &ShiftVal) const {
2546 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2547 MI.getOpcode() == TargetOpcode::G_LSHR ||
2548 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2549
2550 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2551 if (Ty.isVector()) // TODO:
2552 return false;
2553
2554 // Don't narrow further than the requested size.
2555 unsigned Size = Ty.getSizeInBits();
2556 if (Size <= TargetShiftSize)
2557 return false;
2558
2559 auto MaybeImmVal =
2560 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2561 if (!MaybeImmVal)
2562 return false;
2563
2564 ShiftVal = MaybeImmVal->Value.getSExtValue();
2565 return ShiftVal >= Size / 2 && ShiftVal < Size;
2566}
2567
2569 MachineInstr &MI, const unsigned &ShiftVal) const {
2570 Register DstReg = MI.getOperand(0).getReg();
2571 Register SrcReg = MI.getOperand(1).getReg();
2572 LLT Ty = MRI.getType(SrcReg);
2573 unsigned Size = Ty.getSizeInBits();
2574 unsigned HalfSize = Size / 2;
2575 assert(ShiftVal >= HalfSize);
2576
2577 LLT HalfTy = LLT::scalar(HalfSize);
2578
2579 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2580 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2581
2582 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2583 Register Narrowed = Unmerge.getReg(1);
2584
2585 // dst = G_LSHR s64:x, C for C >= 32
2586 // =>
2587 // lo, hi = G_UNMERGE_VALUES x
2588 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2589
2590 if (NarrowShiftAmt != 0) {
2591 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2592 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2593 }
2594
2595 auto Zero = Builder.buildConstant(HalfTy, 0);
2596 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2597 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2598 Register Narrowed = Unmerge.getReg(0);
2599 // dst = G_SHL s64:x, C for C >= 32
2600 // =>
2601 // lo, hi = G_UNMERGE_VALUES x
2602 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2603 if (NarrowShiftAmt != 0) {
2604 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2605 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2606 }
2607
2608 auto Zero = Builder.buildConstant(HalfTy, 0);
2609 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2610 } else {
2611 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2612 auto Hi = Builder.buildAShr(
2613 HalfTy, Unmerge.getReg(1),
2614 Builder.buildConstant(HalfTy, HalfSize - 1));
2615
2616 if (ShiftVal == HalfSize) {
2617 // (G_ASHR i64:x, 32) ->
2618 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2619 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2620 } else if (ShiftVal == Size - 1) {
2621 // Don't need a second shift.
2622 // (G_ASHR i64:x, 63) ->
2623 // %narrowed = (G_ASHR hi_32(x), 31)
2624 // G_MERGE_VALUES %narrowed, %narrowed
2625 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2626 } else {
2627 auto Lo = Builder.buildAShr(
2628 HalfTy, Unmerge.getReg(1),
2629 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2630
2631 // (G_ASHR i64:x, C) ->, for C >= 32
2632 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2633 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2634 }
2635 }
2636
2637 MI.eraseFromParent();
2638}
2639
2641 MachineInstr &MI, unsigned TargetShiftAmount) const {
2642 unsigned ShiftAmt;
2643 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2644 applyCombineShiftToUnmerge(MI, ShiftAmt);
2645 return true;
2646 }
2647
2648 return false;
2649}
2650
2652 Register &Reg) const {
2653 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2654 Register DstReg = MI.getOperand(0).getReg();
2655 LLT DstTy = MRI.getType(DstReg);
2656 Register SrcReg = MI.getOperand(1).getReg();
2657 return mi_match(SrcReg, MRI,
2658 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2659}
2660
2662 Register &Reg) const {
2663 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2664 Register DstReg = MI.getOperand(0).getReg();
2665 Builder.buildCopy(DstReg, Reg);
2666 MI.eraseFromParent();
2667}
2668
2670 Register &Reg) const {
2671 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2672 Register DstReg = MI.getOperand(0).getReg();
2673 Builder.buildZExtOrTrunc(DstReg, Reg);
2674 MI.eraseFromParent();
2675}
2676
2678 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2679 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2680 Register LHS = MI.getOperand(1).getReg();
2681 Register RHS = MI.getOperand(2).getReg();
2682 LLT IntTy = MRI.getType(LHS);
2683
2684 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2685 // instruction.
2686 PtrReg.second = false;
2687 for (Register SrcReg : {LHS, RHS}) {
2688 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2689 // Don't handle cases where the integer is implicitly converted to the
2690 // pointer width.
2691 LLT PtrTy = MRI.getType(PtrReg.first);
2692 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2693 return true;
2694 }
2695
2696 PtrReg.second = true;
2697 }
2698
2699 return false;
2700}
2701
2703 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2704 Register Dst = MI.getOperand(0).getReg();
2705 Register LHS = MI.getOperand(1).getReg();
2706 Register RHS = MI.getOperand(2).getReg();
2707
2708 const bool DoCommute = PtrReg.second;
2709 if (DoCommute)
2710 std::swap(LHS, RHS);
2711 LHS = PtrReg.first;
2712
2713 LLT PtrTy = MRI.getType(LHS);
2714
2715 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2716 Builder.buildPtrToInt(Dst, PtrAdd);
2717 MI.eraseFromParent();
2718}
2719
2721 APInt &NewCst) const {
2722 auto &PtrAdd = cast<GPtrAdd>(MI);
2723 Register LHS = PtrAdd.getBaseReg();
2724 Register RHS = PtrAdd.getOffsetReg();
2725 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2726
2727 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2728 APInt Cst;
2729 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2730 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2731 // G_INTTOPTR uses zero-extension
2732 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2733 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2734 return true;
2735 }
2736 }
2737
2738 return false;
2739}
2740
2742 APInt &NewCst) const {
2743 auto &PtrAdd = cast<GPtrAdd>(MI);
2744 Register Dst = PtrAdd.getReg(0);
2745
2746 Builder.buildConstant(Dst, NewCst);
2747 PtrAdd.eraseFromParent();
2748}
2749
2751 Register &Reg) const {
2752 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2753 Register DstReg = MI.getOperand(0).getReg();
2754 Register SrcReg = MI.getOperand(1).getReg();
2755 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2756 if (OriginalSrcReg.isValid())
2757 SrcReg = OriginalSrcReg;
2758 LLT DstTy = MRI.getType(DstReg);
2759 return mi_match(SrcReg, MRI,
2760 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2761 canReplaceReg(DstReg, Reg, MRI);
2762}
2763
2765 Register &Reg) const {
2766 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2767 Register DstReg = MI.getOperand(0).getReg();
2768 Register SrcReg = MI.getOperand(1).getReg();
2769 LLT DstTy = MRI.getType(DstReg);
2770 if (mi_match(SrcReg, MRI,
2771 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2772 canReplaceReg(DstReg, Reg, MRI)) {
2773 unsigned DstSize = DstTy.getScalarSizeInBits();
2774 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2775 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2776 }
2777 return false;
2778}
2779
2781 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2782 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2783
2784 // ShiftTy > 32 > TruncTy -> 32
2785 if (ShiftSize > 32 && TruncSize < 32)
2786 return ShiftTy.changeElementSize(32);
2787
2788 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2789 // Some targets like it, some don't, some only like it under certain
2790 // conditions/processor versions, etc.
2791 // A TL hook might be needed for this.
2792
2793 // Don't combine
2794 return ShiftTy;
2795}
2796
2798 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2799 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2800 Register DstReg = MI.getOperand(0).getReg();
2801 Register SrcReg = MI.getOperand(1).getReg();
2802
2803 if (!MRI.hasOneNonDBGUse(SrcReg))
2804 return false;
2805
2806 LLT SrcTy = MRI.getType(SrcReg);
2807 LLT DstTy = MRI.getType(DstReg);
2808
2809 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2810 const auto &TL = getTargetLowering();
2811
2812 LLT NewShiftTy;
2813 switch (SrcMI->getOpcode()) {
2814 default:
2815 return false;
2816 case TargetOpcode::G_SHL: {
2817 NewShiftTy = DstTy;
2818
2819 // Make sure new shift amount is legal.
2820 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2821 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2822 return false;
2823 break;
2824 }
2825 case TargetOpcode::G_LSHR:
2826 case TargetOpcode::G_ASHR: {
2827 // For right shifts, we conservatively do not do the transform if the TRUNC
2828 // has any STORE users. The reason is that if we change the type of the
2829 // shift, we may break the truncstore combine.
2830 //
2831 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2832 for (auto &User : MRI.use_instructions(DstReg))
2833 if (User.getOpcode() == TargetOpcode::G_STORE)
2834 return false;
2835
2836 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2837 if (NewShiftTy == SrcTy)
2838 return false;
2839
2840 // Make sure we won't lose information by truncating the high bits.
2841 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2842 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2843 DstTy.getScalarSizeInBits()))
2844 return false;
2845 break;
2846 }
2847 }
2848
2850 {SrcMI->getOpcode(),
2851 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2852 return false;
2853
2854 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2855 return true;
2856}
2857
2859 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2860 MachineInstr *ShiftMI = MatchInfo.first;
2861 LLT NewShiftTy = MatchInfo.second;
2862
2863 Register Dst = MI.getOperand(0).getReg();
2864 LLT DstTy = MRI.getType(Dst);
2865
2866 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2867 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2868 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2869
2870 Register NewShift =
2871 Builder
2872 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2873 .getReg(0);
2874
2875 if (NewShiftTy == DstTy)
2876 replaceRegWith(MRI, Dst, NewShift);
2877 else
2878 Builder.buildTrunc(Dst, NewShift);
2879
2880 eraseInst(MI);
2881}
2882
2884 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2885 return MO.isReg() &&
2886 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2887 });
2888}
2889
2891 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2892 return !MO.isReg() ||
2893 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2894 });
2895}
2896
2898 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2899 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2900 return all_of(Mask, [](int Elt) { return Elt < 0; });
2901}
2902
2904 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2905 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2906 MRI);
2907}
2908
2910 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2911 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2912 MRI);
2913}
2914
2916 MachineInstr &MI) const {
2917 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2918 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2919 "Expected an insert/extract element op");
2920 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2921 if (VecTy.isScalableVector())
2922 return false;
2923
2924 unsigned IdxIdx =
2925 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2926 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2927 if (!Idx)
2928 return false;
2929 return Idx->getZExtValue() >= VecTy.getNumElements();
2930}
2931
2933 unsigned &OpIdx) const {
2934 GSelect &SelMI = cast<GSelect>(MI);
2935 auto Cst =
2936 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2937 if (!Cst)
2938 return false;
2939 OpIdx = Cst->isZero() ? 3 : 2;
2940 return true;
2941}
2942
2943void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2944
2946 const MachineOperand &MOP2) const {
2947 if (!MOP1.isReg() || !MOP2.isReg())
2948 return false;
2949 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2950 if (!InstAndDef1)
2951 return false;
2952 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2953 if (!InstAndDef2)
2954 return false;
2955 MachineInstr *I1 = InstAndDef1->MI;
2956 MachineInstr *I2 = InstAndDef2->MI;
2957
2958 // Handle a case like this:
2959 //
2960 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2961 //
2962 // Even though %0 and %1 are produced by the same instruction they are not
2963 // the same values.
2964 if (I1 == I2)
2965 return MOP1.getReg() == MOP2.getReg();
2966
2967 // If we have an instruction which loads or stores, we can't guarantee that
2968 // it is identical.
2969 //
2970 // For example, we may have
2971 //
2972 // %x1 = G_LOAD %addr (load N from @somewhere)
2973 // ...
2974 // call @foo
2975 // ...
2976 // %x2 = G_LOAD %addr (load N from @somewhere)
2977 // ...
2978 // %or = G_OR %x1, %x2
2979 //
2980 // It's possible that @foo will modify whatever lives at the address we're
2981 // loading from. To be safe, let's just assume that all loads and stores
2982 // are different (unless we have something which is guaranteed to not
2983 // change.)
2984 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2985 return false;
2986
2987 // If both instructions are loads or stores, they are equal only if both
2988 // are dereferenceable invariant loads with the same number of bits.
2989 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2992 if (!LS1 || !LS2)
2993 return false;
2994
2995 if (!I2->isDereferenceableInvariantLoad() ||
2996 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2997 return false;
2998 }
2999
3000 // Check for physical registers on the instructions first to avoid cases
3001 // like this:
3002 //
3003 // %a = COPY $physreg
3004 // ...
3005 // SOMETHING implicit-def $physreg
3006 // ...
3007 // %b = COPY $physreg
3008 //
3009 // These copies are not equivalent.
3010 if (any_of(I1->uses(), [](const MachineOperand &MO) {
3011 return MO.isReg() && MO.getReg().isPhysical();
3012 })) {
3013 // Check if we have a case like this:
3014 //
3015 // %a = COPY $physreg
3016 // %b = COPY %a
3017 //
3018 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
3019 // From that, we know that they must have the same value, since they must
3020 // have come from the same COPY.
3021 return I1->isIdenticalTo(*I2);
3022 }
3023
3024 // We don't have any physical registers, so we don't necessarily need the
3025 // same vreg defs.
3026 //
3027 // On the off-chance that there's some target instruction feeding into the
3028 // instruction, let's use produceSameValue instead of isIdenticalTo.
3029 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
3030 // Handle instructions with multiple defs that produce same values. Values
3031 // are same for operands with same index.
3032 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3033 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
3034 // I1 and I2 are different instructions but produce same values,
3035 // %1 and %6 are same, %1 and %7 are not the same value.
3036 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
3037 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
3038 }
3039 return false;
3040}
3041
3043 int64_t C) const {
3044 if (!MOP.isReg())
3045 return false;
3046 auto *MI = MRI.getVRegDef(MOP.getReg());
3047 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
3048 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
3049 MaybeCst->getSExtValue() == C;
3050}
3051
3053 double C) const {
3054 if (!MOP.isReg())
3055 return false;
3056 std::optional<FPValueAndVReg> MaybeCst;
3057 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
3058 return false;
3059
3060 return MaybeCst->Value.isExactlyValue(C);
3061}
3062
3064 unsigned OpIdx) const {
3065 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3066 Register OldReg = MI.getOperand(0).getReg();
3067 Register Replacement = MI.getOperand(OpIdx).getReg();
3068 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3069 replaceRegWith(MRI, OldReg, Replacement);
3070 MI.eraseFromParent();
3071}
3072
3074 Register Replacement) const {
3075 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
3076 Register OldReg = MI.getOperand(0).getReg();
3077 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3078 replaceRegWith(MRI, OldReg, Replacement);
3079 MI.eraseFromParent();
3080}
3081
3083 unsigned ConstIdx) const {
3084 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3085 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3086
3087 // Get the shift amount
3088 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3089 if (!VRegAndVal)
3090 return false;
3091
3092 // Return true of shift amount >= Bitwidth
3093 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3094}
3095
3097 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3098 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3099 "This is not a funnel shift operation");
3100
3101 Register ConstReg = MI.getOperand(3).getReg();
3102 LLT ConstTy = MRI.getType(ConstReg);
3103 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3104
3105 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3106 assert((VRegAndVal) && "Value is not a constant");
3107
3108 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3109 APInt NewConst = VRegAndVal->Value.urem(
3110 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3111
3112 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3113 Builder.buildInstr(
3114 MI.getOpcode(), {MI.getOperand(0)},
3115 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3116
3117 MI.eraseFromParent();
3118}
3119
3121 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3122 // Match (cond ? x : x)
3123 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3124 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3125 MRI);
3126}
3127
3129 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3130 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3131 MRI);
3132}
3133
3135 unsigned OpIdx) const {
3136 MachineOperand &MO = MI.getOperand(OpIdx);
3137 return MO.isReg() &&
3138 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3139}
3140
3142 const MachineOperand &MO, bool OrNegative) const {
3143 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT, OrNegative);
3144}
3145
3147 double C) const {
3148 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3149 Builder.buildFConstant(MI.getOperand(0), C);
3150 MI.eraseFromParent();
3151}
3152
3154 int64_t C) const {
3155 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3156 Builder.buildConstant(MI.getOperand(0), C);
3157 MI.eraseFromParent();
3158}
3159
3161 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3162 Builder.buildConstant(MI.getOperand(0), C);
3163 MI.eraseFromParent();
3164}
3165
3167 ConstantFP *CFP) const {
3168 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3169 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3170 MI.eraseFromParent();
3171}
3172
3174 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3175 Builder.buildUndef(MI.getOperand(0));
3176 MI.eraseFromParent();
3177}
3178
3180 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3181 Register LHS = MI.getOperand(1).getReg();
3182 Register RHS = MI.getOperand(2).getReg();
3183 Register &NewLHS = std::get<0>(MatchInfo);
3184 Register &NewRHS = std::get<1>(MatchInfo);
3185
3186 // Helper lambda to check for opportunities for
3187 // ((0-A) + B) -> B - A
3188 // (A + (0-B)) -> A - B
3189 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3190 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3191 return false;
3192 NewLHS = MaybeNewLHS;
3193 return true;
3194 };
3195
3196 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3197}
3198
3200 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3201 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3202 "Invalid opcode");
3203 Register DstReg = MI.getOperand(0).getReg();
3204 LLT DstTy = MRI.getType(DstReg);
3205 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3206
3207 if (DstTy.isScalableVector())
3208 return false;
3209
3210 unsigned NumElts = DstTy.getNumElements();
3211 // If this MI is part of a sequence of insert_vec_elts, then
3212 // don't do the combine in the middle of the sequence.
3213 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3214 TargetOpcode::G_INSERT_VECTOR_ELT)
3215 return false;
3216 MachineInstr *CurrInst = &MI;
3217 MachineInstr *TmpInst;
3218 int64_t IntImm;
3219 Register TmpReg;
3220 MatchInfo.resize(NumElts);
3221 while (mi_match(
3222 CurrInst->getOperand(0).getReg(), MRI,
3223 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3224 if (IntImm >= NumElts || IntImm < 0)
3225 return false;
3226 if (!MatchInfo[IntImm])
3227 MatchInfo[IntImm] = TmpReg;
3228 CurrInst = TmpInst;
3229 }
3230 // Variable index.
3231 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3232 return false;
3233 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3234 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3235 if (!MatchInfo[I - 1].isValid())
3236 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3237 }
3238 return true;
3239 }
3240 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3241 // overwritten, bail out.
3242 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3243 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3244}
3245
3247 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3248 Register UndefReg;
3249 auto GetUndef = [&]() {
3250 if (UndefReg)
3251 return UndefReg;
3252 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3253 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3254 return UndefReg;
3255 };
3256 for (Register &Reg : MatchInfo) {
3257 if (!Reg)
3258 Reg = GetUndef();
3259 }
3260 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3261 MI.eraseFromParent();
3262}
3263
3265 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3266 Register SubLHS, SubRHS;
3267 std::tie(SubLHS, SubRHS) = MatchInfo;
3268 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3269 MI.eraseFromParent();
3270}
3271
3272bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3273 unsigned RootOpc, Register Dst,
3274 LLT Ty,
3275 BuildFnTy &MatchInfo) const {
3276 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3277 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3278 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3279 if (!InnerDef)
3280 return false;
3281
3282 unsigned InnerOpc = InnerDef->getOpcode();
3283 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3284 return false;
3285
3286 if (!MRI.hasOneNonDBGUse(MInner))
3287 return false;
3288
3289 Register InnerLHS = InnerDef->getOperand(1).getReg();
3290 Register InnerRHS = InnerDef->getOperand(2).getReg();
3291 Register NotSrc;
3292 Register B, C;
3293
3294 // Check if either operand is ~b
3295 auto TryMatch = [&](Register MaybeNot, Register Other) {
3296 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3297 if (!MRI.hasOneNonDBGUse(MaybeNot))
3298 return false;
3299 B = NotSrc;
3300 C = Other;
3301 return true;
3302 }
3303 return false;
3304 };
3305
3306 // For SUB, the not must be the LHS. For ADD, it can be either operand.
3307 if (!TryMatch(InnerLHS, InnerRHS) &&
3308 !(InnerOpc == TargetOpcode::G_ADD && TryMatch(InnerRHS, InnerLHS)))
3309 return false;
3310
3311 // Flip add/sub
3312 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3313 : TargetOpcode::G_ADD;
3314
3315 Register A = Other;
3316 MatchInfo = [=](MachineIRBuilder &Builder) {
3317 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3318 auto NewNot = Builder.buildNot(Ty, NewInner);
3319 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3320 };
3321 return true;
3322}
3323
3325 BuildFnTy &MatchInfo) const {
3326 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3327 // Root MI is one of G_AND, G_OR, G_XOR.
3328 // We also look for commuted forms of operations. Pattern shouldn't apply
3329 // if there are multiple reasons of inner operations.
3330
3331 unsigned RootOpc = MI.getOpcode();
3332 Register Dst = MI.getOperand(0).getReg();
3333 LLT Ty = MRI.getType(Dst);
3334
3335 Register LHS = MI.getOperand(1).getReg();
3336 Register RHS = MI.getOperand(2).getReg();
3337 // Check the commuted and uncommuted forms of the operation.
3338 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3339 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3340}
3341
3343 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3344 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3345 //
3346 // Creates the new hand + logic instruction (but does not insert them.)
3347 //
3348 // On success, MatchInfo is populated with the new instructions. These are
3349 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3350 unsigned LogicOpcode = MI.getOpcode();
3351 assert(LogicOpcode == TargetOpcode::G_AND ||
3352 LogicOpcode == TargetOpcode::G_OR ||
3353 LogicOpcode == TargetOpcode::G_XOR);
3354 MachineIRBuilder MIB(MI);
3355 Register Dst = MI.getOperand(0).getReg();
3356 Register LHSReg = MI.getOperand(1).getReg();
3357 Register RHSReg = MI.getOperand(2).getReg();
3358
3359 // Don't recompute anything.
3360 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3361 return false;
3362
3363 // Make sure we have (hand x, ...), (hand y, ...)
3364 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3365 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3366 if (!LeftHandInst || !RightHandInst)
3367 return false;
3368 unsigned HandOpcode = LeftHandInst->getOpcode();
3369 if (HandOpcode != RightHandInst->getOpcode())
3370 return false;
3371 if (LeftHandInst->getNumOperands() < 2 ||
3372 !LeftHandInst->getOperand(1).isReg() ||
3373 RightHandInst->getNumOperands() < 2 ||
3374 !RightHandInst->getOperand(1).isReg())
3375 return false;
3376
3377 // Make sure the types match up, and if we're doing this post-legalization,
3378 // we end up with legal types.
3379 Register X = LeftHandInst->getOperand(1).getReg();
3380 Register Y = RightHandInst->getOperand(1).getReg();
3381 LLT XTy = MRI.getType(X);
3382 LLT YTy = MRI.getType(Y);
3383 if (!XTy.isValid() || XTy != YTy)
3384 return false;
3385
3386 // Optional extra source register.
3387 Register ExtraHandOpSrcReg;
3388 switch (HandOpcode) {
3389 default:
3390 return false;
3391 case TargetOpcode::G_ANYEXT:
3392 case TargetOpcode::G_SEXT:
3393 case TargetOpcode::G_ZEXT: {
3394 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3395 break;
3396 }
3397 case TargetOpcode::G_TRUNC: {
3398 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3399 const MachineFunction *MF = MI.getMF();
3400 LLVMContext &Ctx = MF->getFunction().getContext();
3401
3402 LLT DstTy = MRI.getType(Dst);
3403 const TargetLowering &TLI = getTargetLowering();
3404
3405 // Be extra careful sinking truncate. If it's free, there's no benefit in
3406 // widening a binop.
3407 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3408 return false;
3409 break;
3410 }
3411 case TargetOpcode::G_AND:
3412 case TargetOpcode::G_ASHR:
3413 case TargetOpcode::G_LSHR:
3414 case TargetOpcode::G_SHL: {
3415 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3416 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3417 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3418 return false;
3419 ExtraHandOpSrcReg = ZOp.getReg();
3420 break;
3421 }
3422 }
3423
3424 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3425 return false;
3426
3427 // Record the steps to build the new instructions.
3428 //
3429 // Steps to build (logic x, y)
3430 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3431 OperandBuildSteps LogicBuildSteps = {
3432 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3433 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3434 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3435 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3436
3437 // Steps to build hand (logic x, y), ...z
3438 OperandBuildSteps HandBuildSteps = {
3439 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3440 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3441 if (ExtraHandOpSrcReg.isValid())
3442 HandBuildSteps.push_back(
3443 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3444 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3445
3446 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3447 return true;
3448}
3449
3451 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3452 assert(MatchInfo.InstrsToBuild.size() &&
3453 "Expected at least one instr to build?");
3454 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3455 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3456 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3457 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3458 for (auto &OperandFn : InstrToBuild.OperandFns)
3459 OperandFn(Instr);
3460 }
3461 MI.eraseFromParent();
3462}
3463
3465 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3466 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3467 int64_t ShlCst, AshrCst;
3468 Register Src;
3469 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3470 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3471 m_ICstOrSplat(AshrCst))))
3472 return false;
3473 if (ShlCst != AshrCst)
3474 return false;
3476 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3477 return false;
3478 MatchInfo = std::make_tuple(Src, ShlCst);
3479 return true;
3480}
3481
3483 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3484 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3485 Register Src;
3486 int64_t ShiftAmt;
3487 std::tie(Src, ShiftAmt) = MatchInfo;
3488 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3489 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3490 MI.eraseFromParent();
3491}
3492
3493/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3496 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3497 assert(MI.getOpcode() == TargetOpcode::G_AND);
3498
3499 Register Dst = MI.getOperand(0).getReg();
3500 LLT Ty = MRI.getType(Dst);
3501
3502 Register R;
3503 int64_t C1;
3504 int64_t C2;
3505 if (!mi_match(
3506 Dst, MRI,
3507 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3508 return false;
3509
3510 MatchInfo = [=](MachineIRBuilder &B) {
3511 if (C1 & C2) {
3512 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3513 return;
3514 }
3515 auto Zero = B.buildConstant(Ty, 0);
3516 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3517 };
3518 return true;
3519}
3520
3522 Register &Replacement) const {
3523 // Given
3524 //
3525 // %y:_(sN) = G_SOMETHING
3526 // %x:_(sN) = G_SOMETHING
3527 // %res:_(sN) = G_AND %x, %y
3528 //
3529 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3530 //
3531 // Patterns like this can appear as a result of legalization. E.g.
3532 //
3533 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3534 // %one:_(s32) = G_CONSTANT i32 1
3535 // %and:_(s32) = G_AND %cmp, %one
3536 //
3537 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3538 assert(MI.getOpcode() == TargetOpcode::G_AND);
3539 if (!VT)
3540 return false;
3541
3542 Register AndDst = MI.getOperand(0).getReg();
3543 Register LHS = MI.getOperand(1).getReg();
3544 Register RHS = MI.getOperand(2).getReg();
3545
3546 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3547 // we can't do anything. If we do, then it depends on whether we have
3548 // KnownBits on the LHS.
3549 KnownBits RHSBits = VT->getKnownBits(RHS);
3550 if (RHSBits.isUnknown())
3551 return false;
3552
3553 KnownBits LHSBits = VT->getKnownBits(LHS);
3554
3555 // Check that x & Mask == x.
3556 // x & 1 == x, always
3557 // x & 0 == x, only if x is also 0
3558 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3559 //
3560 // Check if we can replace AndDst with the LHS of the G_AND
3561 if (canReplaceReg(AndDst, LHS, MRI) &&
3562 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3563 Replacement = LHS;
3564 return true;
3565 }
3566
3567 // Check if we can replace AndDst with the RHS of the G_AND
3568 if (canReplaceReg(AndDst, RHS, MRI) &&
3569 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3570 Replacement = RHS;
3571 return true;
3572 }
3573
3574 return false;
3575}
3576
3578 Register &Replacement) const {
3579 // Given
3580 //
3581 // %y:_(sN) = G_SOMETHING
3582 // %x:_(sN) = G_SOMETHING
3583 // %res:_(sN) = G_OR %x, %y
3584 //
3585 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3586 assert(MI.getOpcode() == TargetOpcode::G_OR);
3587 if (!VT)
3588 return false;
3589
3590 Register OrDst = MI.getOperand(0).getReg();
3591 Register LHS = MI.getOperand(1).getReg();
3592 Register RHS = MI.getOperand(2).getReg();
3593
3594 KnownBits LHSBits = VT->getKnownBits(LHS);
3595 KnownBits RHSBits = VT->getKnownBits(RHS);
3596
3597 // Check that x | Mask == x.
3598 // x | 0 == x, always
3599 // x | 1 == x, only if x is also 1
3600 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3601 //
3602 // Check if we can replace OrDst with the LHS of the G_OR
3603 if (canReplaceReg(OrDst, LHS, MRI) &&
3604 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3605 Replacement = LHS;
3606 return true;
3607 }
3608
3609 // Check if we can replace OrDst with the RHS of the G_OR
3610 if (canReplaceReg(OrDst, RHS, MRI) &&
3611 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3612 Replacement = RHS;
3613 return true;
3614 }
3615
3616 return false;
3617}
3618
3620 // If the input is already sign extended, just drop the extension.
3621 Register Src = MI.getOperand(1).getReg();
3622 unsigned ExtBits = MI.getOperand(2).getImm();
3623 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3624 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3625}
3626
3627static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3628 int64_t Cst, bool IsVector, bool IsFP) {
3629 // For i1, Cst will always be -1 regardless of boolean contents.
3630 return (ScalarSizeBits == 1 && Cst == -1) ||
3631 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3632}
3633
3634// This pattern aims to match the following shape to avoid extra mov
3635// instructions
3636// G_BUILD_VECTOR(
3637// G_UNMERGE_VALUES(src, 0)
3638// G_UNMERGE_VALUES(src, 1)
3639// G_IMPLICIT_DEF
3640// G_IMPLICIT_DEF
3641// )
3642// ->
3643// G_CONCAT_VECTORS(
3644// src,
3645// undef
3646// )
3649 Register &UnmergeSrc) const {
3650 auto &BV = cast<GBuildVector>(MI);
3651
3652 unsigned BuildUseCount = BV.getNumSources();
3653 if (BuildUseCount % 2 != 0)
3654 return false;
3655
3656 unsigned NumUnmerge = BuildUseCount / 2;
3657
3658 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3659
3660 // Check the first operand is an unmerge and has the correct number of
3661 // operands
3662 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3663 return false;
3664
3665 UnmergeSrc = Unmerge->getSourceReg();
3666
3667 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3668 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3669
3670 if (!UnmergeSrcTy.isVector())
3671 return false;
3672
3673 // Ensure we only generate legal instructions post-legalizer
3674 if (!IsPreLegalize &&
3675 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3676 return false;
3677
3678 // Check that all of the operands before the midpoint come from the same
3679 // unmerge and are in the same order as they are used in the build_vector
3680 for (unsigned I = 0; I < NumUnmerge; ++I) {
3681 auto MaybeUnmergeReg = BV.getSourceReg(I);
3682 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3683
3684 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3685 return false;
3686
3687 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3688 return false;
3689 }
3690
3691 // Check that all of the unmerged values are used
3692 if (Unmerge->getNumDefs() != NumUnmerge)
3693 return false;
3694
3695 // Check that all of the operands after the mid point are undefs.
3696 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3697 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3698
3699 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3700 return false;
3701 }
3702
3703 return true;
3704}
3705
3709 Register &UnmergeSrc) const {
3710 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3711 B.setInstrAndDebugLoc(MI);
3712
3713 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3714 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3715
3716 MI.eraseFromParent();
3717}
3718
3719// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3720// using vector truncates instead
3721//
3722// EXAMPLE:
3723// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3724// %T_a(i16) = G_TRUNC %a(i32)
3725// %T_b(i16) = G_TRUNC %b(i32)
3726// %Undef(i16) = G_IMPLICIT_DEF(i16)
3727// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3728//
3729// ===>
3730// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3731// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3732// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3733//
3734// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3736 Register &MatchInfo) const {
3737 auto BuildMI = cast<GBuildVector>(&MI);
3738 unsigned NumOperands = BuildMI->getNumSources();
3739 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3740
3741 // Check the G_BUILD_VECTOR sources
3742 unsigned I;
3743 MachineInstr *UnmergeMI = nullptr;
3744
3745 // Check all source TRUNCs come from the same UNMERGE instruction
3746 // and that the element order matches (BUILD_VECTOR position I
3747 // corresponds to UNMERGE result I)
3748 for (I = 0; I < NumOperands; ++I) {
3749 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3750 auto SrcMIOpc = SrcMI->getOpcode();
3751
3752 // Check if the G_TRUNC instructions all come from the same MI
3753 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3754 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3755 if (!UnmergeMI) {
3756 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3757 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3758 return false;
3759 } else {
3760 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3761 if (UnmergeMI != UnmergeSrcMI)
3762 return false;
3763 }
3764 // Verify element ordering: BUILD_VECTOR position I must use
3765 // UNMERGE result I, otherwise the fold would lose element reordering
3766 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3767 return false;
3768 } else {
3769 break;
3770 }
3771 }
3772 if (I < 2)
3773 return false;
3774
3775 // Check the remaining source elements are only G_IMPLICIT_DEF
3776 for (; I < NumOperands; ++I) {
3777 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3778 auto SrcMIOpc = SrcMI->getOpcode();
3779
3780 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3781 return false;
3782 }
3783
3784 // Check the size of unmerge source
3785 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3786 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3787 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3788 return false;
3789
3790 // Check the unmerge source and destination element types match
3791 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3792 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3793 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3794 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3795 return false;
3796
3797 // Only generate legal instructions post-legalizer
3798 if (!IsPreLegalize) {
3799 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3800
3801 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3802 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3803 return false;
3804
3805 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3806 return false;
3807 }
3808
3809 return true;
3810}
3811
3813 Register &MatchInfo) const {
3814 Register MidReg;
3815 auto BuildMI = cast<GBuildVector>(&MI);
3816 Register DstReg = BuildMI->getReg(0);
3817 LLT DstTy = MRI.getType(DstReg);
3818 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3819 unsigned DstTyNumElt = DstTy.getNumElements();
3820 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3821
3822 // No need to pad vector if only G_TRUNC is needed
3823 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3824 MidReg = MatchInfo;
3825 } else {
3826 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3827 SmallVector<Register> ConcatRegs = {MatchInfo};
3828 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3829 ConcatRegs.push_back(UndefReg);
3830
3831 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3832 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3833 }
3834
3835 Builder.buildTrunc(DstReg, MidReg);
3836 MI.eraseFromParent();
3837}
3838
3840 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3841 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3842 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3843 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3844 Register XorSrc;
3845 Register CstReg;
3846 // We match xor(src, true) here.
3847 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3848 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3849 return false;
3850
3851 if (!MRI.hasOneNonDBGUse(XorSrc))
3852 return false;
3853
3854 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3855 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3856 // list of tree nodes to visit.
3857 RegsToNegate.push_back(XorSrc);
3858 // Remember whether the comparisons are all integer or all floating point.
3859 bool IsInt = false;
3860 bool IsFP = false;
3861 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3862 Register Reg = RegsToNegate[I];
3863 if (!MRI.hasOneNonDBGUse(Reg))
3864 return false;
3865 MachineInstr *Def = MRI.getVRegDef(Reg);
3866 switch (Def->getOpcode()) {
3867 default:
3868 // Don't match if the tree contains anything other than ANDs, ORs and
3869 // comparisons.
3870 return false;
3871 case TargetOpcode::G_ICMP:
3872 if (IsFP)
3873 return false;
3874 IsInt = true;
3875 // When we apply the combine we will invert the predicate.
3876 break;
3877 case TargetOpcode::G_FCMP:
3878 if (IsInt)
3879 return false;
3880 IsFP = true;
3881 // When we apply the combine we will invert the predicate.
3882 break;
3883 case TargetOpcode::G_AND:
3884 case TargetOpcode::G_OR:
3885 // Implement De Morgan's laws:
3886 // ~(x & y) -> ~x | ~y
3887 // ~(x | y) -> ~x & ~y
3888 // When we apply the combine we will change the opcode and recursively
3889 // negate the operands.
3890 RegsToNegate.push_back(Def->getOperand(1).getReg());
3891 RegsToNegate.push_back(Def->getOperand(2).getReg());
3892 break;
3893 }
3894 }
3895
3896 // Now we know whether the comparisons are integer or floating point, check
3897 // the constant in the xor.
3898 int64_t Cst;
3899 if (Ty.isVector()) {
3900 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3901 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3902 if (!MaybeCst)
3903 return false;
3904 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3905 return false;
3906 } else {
3907 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3908 return false;
3909 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3910 return false;
3911 }
3912
3913 return true;
3914}
3915
3917 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3918 for (Register Reg : RegsToNegate) {
3919 MachineInstr *Def = MRI.getVRegDef(Reg);
3920 Observer.changingInstr(*Def);
3921 // For each comparison, invert the opcode. For each AND and OR, change the
3922 // opcode.
3923 switch (Def->getOpcode()) {
3924 default:
3925 llvm_unreachable("Unexpected opcode");
3926 case TargetOpcode::G_ICMP:
3927 case TargetOpcode::G_FCMP: {
3928 MachineOperand &PredOp = Def->getOperand(1);
3931 PredOp.setPredicate(NewP);
3932 break;
3933 }
3934 case TargetOpcode::G_AND:
3935 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3936 break;
3937 case TargetOpcode::G_OR:
3938 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3939 break;
3940 }
3941 Observer.changedInstr(*Def);
3942 }
3943
3944 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3945 MI.eraseFromParent();
3946}
3947
3949 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3950 // Match (xor (and x, y), y) (or any of its commuted cases)
3951 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3952 Register &X = MatchInfo.first;
3953 Register &Y = MatchInfo.second;
3954 Register AndReg = MI.getOperand(1).getReg();
3955 Register SharedReg = MI.getOperand(2).getReg();
3956
3957 // Find a G_AND on either side of the G_XOR.
3958 // Look for one of
3959 //
3960 // (xor (and x, y), SharedReg)
3961 // (xor SharedReg, (and x, y))
3962 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3963 std::swap(AndReg, SharedReg);
3964 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3965 return false;
3966 }
3967
3968 // Only do this if we'll eliminate the G_AND.
3969 if (!MRI.hasOneNonDBGUse(AndReg))
3970 return false;
3971
3972 // We can combine if SharedReg is the same as either the LHS or RHS of the
3973 // G_AND.
3974 if (Y != SharedReg)
3975 std::swap(X, Y);
3976 return Y == SharedReg;
3977}
3978
3980 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3981 // Fold (xor (and x, y), y) -> (and (not x), y)
3982 Register X, Y;
3983 std::tie(X, Y) = MatchInfo;
3984 auto Not = Builder.buildNot(MRI.getType(X), X);
3985 Observer.changingInstr(MI);
3986 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3987 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3988 MI.getOperand(2).setReg(Y);
3989 Observer.changedInstr(MI);
3990}
3991
3993 auto &PtrAdd = cast<GPtrAdd>(MI);
3994 Register DstReg = PtrAdd.getReg(0);
3995 LLT Ty = MRI.getType(DstReg);
3996 const DataLayout &DL = Builder.getMF().getDataLayout();
3997
3998 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3999 return false;
4000
4001 if (Ty.isPointer()) {
4002 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
4003 return ConstVal && *ConstVal == 0;
4004 }
4005
4006 assert(Ty.isVector() && "Expecting a vector type");
4007 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
4008 return isBuildVectorAllZeros(*VecMI, MRI);
4009}
4010
4012 auto &PtrAdd = cast<GPtrAdd>(MI);
4013 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
4014 PtrAdd.eraseFromParent();
4015}
4016
4017/// The second source operand is known to be a power of 2.
4019 Register DstReg = MI.getOperand(0).getReg();
4020 Register Src0 = MI.getOperand(1).getReg();
4021 Register Pow2Src1 = MI.getOperand(2).getReg();
4022 LLT Ty = MRI.getType(DstReg);
4023
4024 // Fold (urem x, pow2) -> (and x, pow2-1)
4025 auto NegOne = Builder.buildConstant(Ty, -1);
4026 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
4027 Builder.buildAnd(DstReg, Src0, Add);
4028 MI.eraseFromParent();
4029}
4030
4032 unsigned &SelectOpNo) const {
4033 Register LHS = MI.getOperand(1).getReg();
4034 Register RHS = MI.getOperand(2).getReg();
4035
4036 Register OtherOperandReg = RHS;
4037 SelectOpNo = 1;
4038 MachineInstr *Select = MRI.getVRegDef(LHS);
4039
4040 // Don't do this unless the old select is going away. We want to eliminate the
4041 // binary operator, not replace a binop with a select.
4042 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4043 !MRI.hasOneNonDBGUse(LHS)) {
4044 OtherOperandReg = LHS;
4045 SelectOpNo = 2;
4046 Select = MRI.getVRegDef(RHS);
4047 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
4048 !MRI.hasOneNonDBGUse(RHS))
4049 return false;
4050 }
4051
4052 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
4053 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
4054
4055 if (!isConstantOrConstantVector(*SelectLHS, MRI,
4056 /*AllowFP*/ true,
4057 /*AllowOpaqueConstants*/ false))
4058 return false;
4059 if (!isConstantOrConstantVector(*SelectRHS, MRI,
4060 /*AllowFP*/ true,
4061 /*AllowOpaqueConstants*/ false))
4062 return false;
4063
4064 unsigned BinOpcode = MI.getOpcode();
4065
4066 // We know that one of the operands is a select of constants. Now verify that
4067 // the other binary operator operand is either a constant, or we can handle a
4068 // variable.
4069 bool CanFoldNonConst =
4070 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
4071 (isNullOrNullSplat(*SelectLHS, MRI) ||
4072 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
4073 (isNullOrNullSplat(*SelectRHS, MRI) ||
4074 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
4075 if (CanFoldNonConst)
4076 return true;
4077
4078 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4079 /*AllowFP*/ true,
4080 /*AllowOpaqueConstants*/ false);
4081}
4082
4083/// \p SelectOperand is the operand in binary operator \p MI that is the select
4084/// to fold.
4086 MachineInstr &MI, const unsigned &SelectOperand) const {
4087 Register Dst = MI.getOperand(0).getReg();
4088 Register LHS = MI.getOperand(1).getReg();
4089 Register RHS = MI.getOperand(2).getReg();
4090 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4091
4092 Register SelectCond = Select->getOperand(1).getReg();
4093 Register SelectTrue = Select->getOperand(2).getReg();
4094 Register SelectFalse = Select->getOperand(3).getReg();
4095
4096 LLT Ty = MRI.getType(Dst);
4097 unsigned BinOpcode = MI.getOpcode();
4098
4099 Register FoldTrue, FoldFalse;
4100
4101 // We have a select-of-constants followed by a binary operator with a
4102 // constant. Eliminate the binop by pulling the constant math into the select.
4103 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4104 if (SelectOperand == 1) {
4105 // TODO: SelectionDAG verifies this actually constant folds before
4106 // committing to the combine.
4107
4108 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4109 FoldFalse =
4110 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4111 } else {
4112 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4113 FoldFalse =
4114 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4115 }
4116
4117 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4118 MI.eraseFromParent();
4119}
4120
4121std::optional<SmallVector<Register, 8>>
4122CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4123 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4124 // We want to detect if Root is part of a tree which represents a bunch
4125 // of loads being merged into a larger load. We'll try to recognize patterns
4126 // like, for example:
4127 //
4128 // Reg Reg
4129 // \ /
4130 // OR_1 Reg
4131 // \ /
4132 // OR_2
4133 // \ Reg
4134 // .. /
4135 // Root
4136 //
4137 // Reg Reg Reg Reg
4138 // \ / \ /
4139 // OR_1 OR_2
4140 // \ /
4141 // \ /
4142 // ...
4143 // Root
4144 //
4145 // Each "Reg" may have been produced by a load + some arithmetic. This
4146 // function will save each of them.
4147 SmallVector<Register, 8> RegsToVisit;
4149
4150 // In the "worst" case, we're dealing with a load for each byte. So, there
4151 // are at most #bytes - 1 ORs.
4152 const unsigned MaxIter =
4153 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4154 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4155 if (Ors.empty())
4156 break;
4157 const MachineInstr *Curr = Ors.pop_back_val();
4158 Register OrLHS = Curr->getOperand(1).getReg();
4159 Register OrRHS = Curr->getOperand(2).getReg();
4160
4161 // In the combine, we want to elimate the entire tree.
4162 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4163 return std::nullopt;
4164
4165 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4166 // something that may be a load + arithmetic.
4167 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4168 Ors.push_back(Or);
4169 else
4170 RegsToVisit.push_back(OrLHS);
4171 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4172 Ors.push_back(Or);
4173 else
4174 RegsToVisit.push_back(OrRHS);
4175 }
4176
4177 // We're going to try and merge each register into a wider power-of-2 type,
4178 // so we ought to have an even number of registers.
4179 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4180 return std::nullopt;
4181 return RegsToVisit;
4182}
4183
4184/// Helper function for findLoadOffsetsForLoadOrCombine.
4185///
4186/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4187/// and then moving that value into a specific byte offset.
4188///
4189/// e.g. x[i] << 24
4190///
4191/// \returns The load instruction and the byte offset it is moved into.
4192static std::optional<std::pair<GZExtLoad *, int64_t>>
4193matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4194 const MachineRegisterInfo &MRI) {
4195 assert(MRI.hasOneNonDBGUse(Reg) &&
4196 "Expected Reg to only have one non-debug use?");
4197 Register MaybeLoad;
4198 int64_t Shift;
4199 if (!mi_match(Reg, MRI,
4200 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4201 Shift = 0;
4202 MaybeLoad = Reg;
4203 }
4204
4205 if (Shift % MemSizeInBits != 0)
4206 return std::nullopt;
4207
4208 // TODO: Handle other types of loads.
4209 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4210 if (!Load)
4211 return std::nullopt;
4212
4213 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4214 return std::nullopt;
4215
4216 return std::make_pair(Load, Shift / MemSizeInBits);
4217}
4218
4219std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4220CombinerHelper::findLoadOffsetsForLoadOrCombine(
4222 const SmallVector<Register, 8> &RegsToVisit,
4223 const unsigned MemSizeInBits) const {
4224
4225 // Each load found for the pattern. There should be one for each RegsToVisit.
4226 SmallSetVector<const MachineInstr *, 8> Loads;
4227
4228 // The lowest index used in any load. (The lowest "i" for each x[i].)
4229 int64_t LowestIdx = INT64_MAX;
4230
4231 // The load which uses the lowest index.
4232 GZExtLoad *LowestIdxLoad = nullptr;
4233
4234 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4235 SmallSet<int64_t, 8> SeenIdx;
4236
4237 // Ensure each load is in the same MBB.
4238 // TODO: Support multiple MachineBasicBlocks.
4239 MachineBasicBlock *MBB = nullptr;
4240 const MachineMemOperand *MMO = nullptr;
4241
4242 // Earliest instruction-order load in the pattern.
4243 GZExtLoad *EarliestLoad = nullptr;
4244
4245 // Latest instruction-order load in the pattern.
4246 GZExtLoad *LatestLoad = nullptr;
4247
4248 // Base pointer which every load should share.
4250
4251 // We want to find a load for each register. Each load should have some
4252 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4253 // track of the load which uses the lowest index. Later, we will check if we
4254 // can use its pointer in the final, combined load.
4255 for (auto Reg : RegsToVisit) {
4256 // Find the load, and find the position that it will end up in (e.g. a
4257 // shifted) value.
4258 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4259 if (!LoadAndPos)
4260 return std::nullopt;
4261 GZExtLoad *Load;
4262 int64_t DstPos;
4263 std::tie(Load, DstPos) = *LoadAndPos;
4264
4265 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4266 // it is difficult to check for stores/calls/etc between loads.
4267 MachineBasicBlock *LoadMBB = Load->getParent();
4268 if (!MBB)
4269 MBB = LoadMBB;
4270 if (LoadMBB != MBB)
4271 return std::nullopt;
4272
4273 // Make sure that the MachineMemOperands of every seen load are compatible.
4274 auto &LoadMMO = Load->getMMO();
4275 if (!MMO)
4276 MMO = &LoadMMO;
4277 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4278 return std::nullopt;
4279
4280 // Find out what the base pointer and index for the load is.
4281 Register LoadPtr;
4282 int64_t Idx;
4283 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4284 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4285 LoadPtr = Load->getOperand(1).getReg();
4286 Idx = 0;
4287 }
4288
4289 // Don't combine things like a[i], a[i] -> a bigger load.
4290 if (!SeenIdx.insert(Idx).second)
4291 return std::nullopt;
4292
4293 // Every load must share the same base pointer; don't combine things like:
4294 //
4295 // a[i], b[i + 1] -> a bigger load.
4296 if (!BasePtr.isValid())
4297 BasePtr = LoadPtr;
4298 if (BasePtr != LoadPtr)
4299 return std::nullopt;
4300
4301 if (Idx < LowestIdx) {
4302 LowestIdx = Idx;
4303 LowestIdxLoad = Load;
4304 }
4305
4306 // Keep track of the byte offset that this load ends up at. If we have seen
4307 // the byte offset, then stop here. We do not want to combine:
4308 //
4309 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4310 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4311 return std::nullopt;
4312 Loads.insert(Load);
4313
4314 // Keep track of the position of the earliest/latest loads in the pattern.
4315 // We will check that there are no load fold barriers between them later
4316 // on.
4317 //
4318 // FIXME: Is there a better way to check for load fold barriers?
4319 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4320 EarliestLoad = Load;
4321 if (!LatestLoad || dominates(*LatestLoad, *Load))
4322 LatestLoad = Load;
4323 }
4324
4325 // We found a load for each register. Let's check if each load satisfies the
4326 // pattern.
4327 assert(Loads.size() == RegsToVisit.size() &&
4328 "Expected to find a load for each register?");
4329 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4330 LatestLoad && "Expected at least two loads?");
4331
4332 // Check if there are any stores, calls, etc. between any of the loads. If
4333 // there are, then we can't safely perform the combine.
4334 //
4335 // MaxIter is chosen based off the (worst case) number of iterations it
4336 // typically takes to succeed in the LLVM test suite plus some padding.
4337 //
4338 // FIXME: Is there a better way to check for load fold barriers?
4339 const unsigned MaxIter = 20;
4340 unsigned Iter = 0;
4341 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4342 LatestLoad->getIterator())) {
4343 if (Loads.count(&MI))
4344 continue;
4345 if (MI.isLoadFoldBarrier())
4346 return std::nullopt;
4347 if (Iter++ == MaxIter)
4348 return std::nullopt;
4349 }
4350
4351 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4352}
4353
4356 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4357 assert(MI.getOpcode() == TargetOpcode::G_OR);
4358 MachineFunction &MF = *MI.getMF();
4359 // Assuming a little-endian target, transform:
4360 // s8 *a = ...
4361 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4362 // =>
4363 // s32 val = *((i32)a)
4364 //
4365 // s8 *a = ...
4366 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4367 // =>
4368 // s32 val = BSWAP(*((s32)a))
4369 Register Dst = MI.getOperand(0).getReg();
4370 LLT Ty = MRI.getType(Dst);
4371 if (Ty.isVector())
4372 return false;
4373
4374 // We need to combine at least two loads into this type. Since the smallest
4375 // possible load is into a byte, we need at least a 16-bit wide type.
4376 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4377 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4378 return false;
4379
4380 // Match a collection of non-OR instructions in the pattern.
4381 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4382 if (!RegsToVisit)
4383 return false;
4384
4385 // We have a collection of non-OR instructions. Figure out how wide each of
4386 // the small loads should be based off of the number of potential loads we
4387 // found.
4388 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4389 if (NarrowMemSizeInBits % 8 != 0)
4390 return false;
4391
4392 // Check if each register feeding into each OR is a load from the same
4393 // base pointer + some arithmetic.
4394 //
4395 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4396 //
4397 // Also verify that each of these ends up putting a[i] into the same memory
4398 // offset as a load into a wide type would.
4400 GZExtLoad *LowestIdxLoad, *LatestLoad;
4401 int64_t LowestIdx;
4402 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4403 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4404 if (!MaybeLoadInfo)
4405 return false;
4406 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4407
4408 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4409 // we found before, check if this corresponds to a big or little endian byte
4410 // pattern. If it does, then we can represent it using a load + possibly a
4411 // BSWAP.
4412 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4413 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4414 if (!IsBigEndian)
4415 return false;
4416 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4417 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4418 return false;
4419
4420 // Make sure that the load from the lowest index produces offset 0 in the
4421 // final value.
4422 //
4423 // This ensures that we won't combine something like this:
4424 //
4425 // load x[i] -> byte 2
4426 // load x[i+1] -> byte 0 ---> wide_load x[i]
4427 // load x[i+2] -> byte 1
4428 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4429 const unsigned ZeroByteOffset =
4430 *IsBigEndian
4431 ? bigEndianByteAt(NumLoadsInTy, 0)
4432 : littleEndianByteAt(NumLoadsInTy, 0);
4433 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4434 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4435 ZeroOffsetIdx->second != LowestIdx)
4436 return false;
4437
4438 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4439 // may not use index 0.
4440 Register Ptr = LowestIdxLoad->getPointerReg();
4441 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4442 LegalityQuery::MemDesc MMDesc(MMO);
4443 MMDesc.MemoryTy = Ty;
4445 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4446 return false;
4447 auto PtrInfo = MMO.getPointerInfo();
4448 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4449
4450 // Load must be allowed and fast on the target.
4452 auto &DL = MF.getDataLayout();
4453 unsigned Fast = 0;
4454 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4455 !Fast)
4456 return false;
4457
4458 MatchInfo = [=](MachineIRBuilder &MIB) {
4459 MIB.setInstrAndDebugLoc(*LatestLoad);
4460 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4461 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4462 if (NeedsBSwap)
4463 MIB.buildBSwap(Dst, LoadDst);
4464 };
4465 return true;
4466}
4467
4469 MachineInstr *&ExtMI) const {
4470 auto &PHI = cast<GPhi>(MI);
4471 Register DstReg = PHI.getReg(0);
4472
4473 // TODO: Extending a vector may be expensive, don't do this until heuristics
4474 // are better.
4475 if (MRI.getType(DstReg).isVector())
4476 return false;
4477
4478 // Try to match a phi, whose only use is an extend.
4479 if (!MRI.hasOneNonDBGUse(DstReg))
4480 return false;
4481 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4482 switch (ExtMI->getOpcode()) {
4483 case TargetOpcode::G_ANYEXT:
4484 return true; // G_ANYEXT is usually free.
4485 case TargetOpcode::G_ZEXT:
4486 case TargetOpcode::G_SEXT:
4487 break;
4488 default:
4489 return false;
4490 }
4491
4492 // If the target is likely to fold this extend away, don't propagate.
4493 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4494 return false;
4495
4496 // We don't want to propagate the extends unless there's a good chance that
4497 // they'll be optimized in some way.
4498 // Collect the unique incoming values.
4500 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4501 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4502 switch (DefMI->getOpcode()) {
4503 case TargetOpcode::G_LOAD:
4504 case TargetOpcode::G_TRUNC:
4505 case TargetOpcode::G_SEXT:
4506 case TargetOpcode::G_ZEXT:
4507 case TargetOpcode::G_ANYEXT:
4508 case TargetOpcode::G_CONSTANT:
4509 InSrcs.insert(DefMI);
4510 // Don't try to propagate if there are too many places to create new
4511 // extends, chances are it'll increase code size.
4512 if (InSrcs.size() > 2)
4513 return false;
4514 break;
4515 default:
4516 return false;
4517 }
4518 }
4519 return true;
4520}
4521
4523 MachineInstr *&ExtMI) const {
4524 auto &PHI = cast<GPhi>(MI);
4525 Register DstReg = ExtMI->getOperand(0).getReg();
4526 LLT ExtTy = MRI.getType(DstReg);
4527
4528 // Propagate the extension into the block of each incoming reg's block.
4529 // Use a SetVector here because PHIs can have duplicate edges, and we want
4530 // deterministic iteration order.
4533 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4534 auto SrcReg = PHI.getIncomingValue(I);
4535 auto *SrcMI = MRI.getVRegDef(SrcReg);
4536 if (!SrcMIs.insert(SrcMI))
4537 continue;
4538
4539 // Build an extend after each src inst.
4540 auto *MBB = SrcMI->getParent();
4541 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4542 if (InsertPt != MBB->end() && InsertPt->isPHI())
4543 InsertPt = MBB->getFirstNonPHI();
4544
4545 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4546 Builder.setDebugLoc(MI.getDebugLoc());
4547 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4548 OldToNewSrcMap[SrcMI] = NewExt;
4549 }
4550
4551 // Create a new phi with the extended inputs.
4552 Builder.setInstrAndDebugLoc(MI);
4553 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4554 NewPhi.addDef(DstReg);
4555 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4556 if (!MO.isReg()) {
4557 NewPhi.addMBB(MO.getMBB());
4558 continue;
4559 }
4560 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4561 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4562 }
4563 Builder.insertInstr(NewPhi);
4564 ExtMI->eraseFromParent();
4565}
4566
4568 Register &Reg) const {
4569 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4570 // If we have a constant index, look for a G_BUILD_VECTOR source
4571 // and find the source register that the index maps to.
4572 Register SrcVec = MI.getOperand(1).getReg();
4573 LLT SrcTy = MRI.getType(SrcVec);
4574 if (SrcTy.isScalableVector())
4575 return false;
4576
4577 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4578 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4579 return false;
4580
4581 unsigned VecIdx = Cst->Value.getZExtValue();
4582
4583 // Check if we have a build_vector or build_vector_trunc with an optional
4584 // trunc in front.
4585 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4586 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4587 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4588 }
4589
4590 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4591 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4592 return false;
4593
4594 EVT Ty(getMVTForLLT(SrcTy));
4595 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4596 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4597 return false;
4598
4599 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4600 return true;
4601}
4602
4604 Register &Reg) const {
4605 // Check the type of the register, since it may have come from a
4606 // G_BUILD_VECTOR_TRUNC.
4607 LLT ScalarTy = MRI.getType(Reg);
4608 Register DstReg = MI.getOperand(0).getReg();
4609 LLT DstTy = MRI.getType(DstReg);
4610
4611 if (ScalarTy != DstTy) {
4612 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4613 Builder.buildTrunc(DstReg, Reg);
4614 MI.eraseFromParent();
4615 return;
4616 }
4618}
4619
4622 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4623 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4624 // This combine tries to find build_vector's which have every source element
4625 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4626 // the masked load scalarization is run late in the pipeline. There's already
4627 // a combine for a similar pattern starting from the extract, but that
4628 // doesn't attempt to do it if there are multiple uses of the build_vector,
4629 // which in this case is true. Starting the combine from the build_vector
4630 // feels more natural than trying to find sibling nodes of extracts.
4631 // E.g.
4632 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4633 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4634 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4635 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4636 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4637 // ==>
4638 // replace ext{1,2,3,4} with %s{1,2,3,4}
4639
4640 Register DstReg = MI.getOperand(0).getReg();
4641 LLT DstTy = MRI.getType(DstReg);
4642 unsigned NumElts = DstTy.getNumElements();
4643
4644 SmallBitVector ExtractedElts(NumElts);
4645 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4646 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4647 return false;
4648 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4649 if (!Cst)
4650 return false;
4651 unsigned Idx = Cst->getZExtValue();
4652 if (Idx >= NumElts)
4653 return false; // Out of range.
4654 ExtractedElts.set(Idx);
4655 SrcDstPairs.emplace_back(
4656 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4657 }
4658 // Match if every element was extracted.
4659 return ExtractedElts.all();
4660}
4661
4664 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4665 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4666 for (auto &Pair : SrcDstPairs) {
4667 auto *ExtMI = Pair.second;
4668 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4669 ExtMI->eraseFromParent();
4670 }
4671 MI.eraseFromParent();
4672}
4673
4676 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4677 applyBuildFnNoErase(MI, MatchInfo);
4678 MI.eraseFromParent();
4679}
4680
4683 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4684 MatchInfo(Builder);
4685}
4686
4688 bool AllowScalarConstants,
4689 BuildFnTy &MatchInfo) const {
4690 assert(MI.getOpcode() == TargetOpcode::G_OR);
4691
4692 Register Dst = MI.getOperand(0).getReg();
4693 LLT Ty = MRI.getType(Dst);
4694 unsigned BitWidth = Ty.getScalarSizeInBits();
4695
4696 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4697 unsigned FshOpc = 0;
4698
4699 // Match (or (shl ...), (lshr ...)).
4700 if (!mi_match(Dst, MRI,
4701 // m_GOr() handles the commuted version as well.
4702 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4703 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4704 return false;
4705
4706 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4707 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4708 int64_t CstShlAmt = 0, CstLShrAmt;
4709 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4710 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4711 CstShlAmt + CstLShrAmt == BitWidth) {
4712 FshOpc = TargetOpcode::G_FSHR;
4713 Amt = LShrAmt;
4714 } else if (mi_match(LShrAmt, MRI,
4716 ShlAmt == Amt) {
4717 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4718 FshOpc = TargetOpcode::G_FSHL;
4719 } else if (mi_match(ShlAmt, MRI,
4721 LShrAmt == Amt) {
4722 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4723 FshOpc = TargetOpcode::G_FSHR;
4724 } else {
4725 return false;
4726 }
4727
4728 LLT AmtTy = MRI.getType(Amt);
4729 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4730 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4731 return false;
4732
4733 MatchInfo = [=](MachineIRBuilder &B) {
4734 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4735 };
4736 return true;
4737}
4738
4739/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4741 unsigned Opc = MI.getOpcode();
4742 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4743 Register X = MI.getOperand(1).getReg();
4744 Register Y = MI.getOperand(2).getReg();
4745 if (X != Y)
4746 return false;
4747 unsigned RotateOpc =
4748 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4749 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4750}
4751
4753 unsigned Opc = MI.getOpcode();
4754 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4755 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4756 Observer.changingInstr(MI);
4757 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4758 : TargetOpcode::G_ROTR));
4759 MI.removeOperand(2);
4760 Observer.changedInstr(MI);
4761}
4762
4763// Fold (rot x, c) -> (rot x, c % BitSize)
4765 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4766 MI.getOpcode() == TargetOpcode::G_ROTR);
4767 unsigned Bitsize =
4768 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4769 Register AmtReg = MI.getOperand(2).getReg();
4770 bool OutOfRange = false;
4771 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4772 if (auto *CI = dyn_cast<ConstantInt>(C))
4773 OutOfRange |= CI->getValue().uge(Bitsize);
4774 return true;
4775 };
4776 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4777}
4778
4780 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4781 MI.getOpcode() == TargetOpcode::G_ROTR);
4782 unsigned Bitsize =
4783 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4784 Register Amt = MI.getOperand(2).getReg();
4785 LLT AmtTy = MRI.getType(Amt);
4786 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4787 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4788 Observer.changingInstr(MI);
4789 MI.getOperand(2).setReg(Amt);
4790 Observer.changedInstr(MI);
4791}
4792
4794 int64_t &MatchInfo) const {
4795 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4796 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4797
4798 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4799 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4800 // KnownBits on the LHS in two cases:
4801 //
4802 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4803 // we cannot do any transforms so we can safely bail out early.
4804 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4805 // >=0.
4806 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4807 if (KnownRHS.isUnknown())
4808 return false;
4809
4810 std::optional<bool> KnownVal;
4811 if (KnownRHS.isZero()) {
4812 // ? uge 0 -> always true
4813 // ? ult 0 -> always false
4814 if (Pred == CmpInst::ICMP_UGE)
4815 KnownVal = true;
4816 else if (Pred == CmpInst::ICMP_ULT)
4817 KnownVal = false;
4818 }
4819
4820 if (!KnownVal) {
4821 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4822 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4823 }
4824
4825 if (!KnownVal)
4826 return false;
4827 MatchInfo =
4828 *KnownVal
4830 /*IsVector = */
4831 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4832 /* IsFP = */ false)
4833 : 0;
4834 return true;
4835}
4836
4839 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4840 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4841 // Given:
4842 //
4843 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4844 // %cmp = G_ICMP ne %x, 0
4845 //
4846 // Or:
4847 //
4848 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4849 // %cmp = G_ICMP eq %x, 1
4850 //
4851 // We can replace %cmp with %x assuming true is 1 on the target.
4852 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4853 if (!CmpInst::isEquality(Pred))
4854 return false;
4855 Register Dst = MI.getOperand(0).getReg();
4856 LLT DstTy = MRI.getType(Dst);
4858 /* IsFP = */ false) != 1)
4859 return false;
4860 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4861 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4862 return false;
4863 Register LHS = MI.getOperand(2).getReg();
4864 auto KnownLHS = VT->getKnownBits(LHS);
4865 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4866 return false;
4867 // Make sure replacing Dst with the LHS is a legal operation.
4868 LLT LHSTy = MRI.getType(LHS);
4869 unsigned LHSSize = LHSTy.getSizeInBits();
4870 unsigned DstSize = DstTy.getSizeInBits();
4871 unsigned Op = TargetOpcode::COPY;
4872 if (DstSize != LHSSize)
4873 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4874 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4875 return false;
4876 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4877 return true;
4878}
4879
4880// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4883 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4884 assert(MI.getOpcode() == TargetOpcode::G_AND);
4885
4886 // Ignore vector types to simplify matching the two constants.
4887 // TODO: do this for vectors and scalars via a demanded bits analysis.
4888 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4889 if (Ty.isVector())
4890 return false;
4891
4892 Register Src;
4893 Register AndMaskReg;
4894 int64_t AndMaskBits;
4895 int64_t OrMaskBits;
4896 if (!mi_match(MI, MRI,
4897 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4898 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4899 return false;
4900
4901 // Check if OrMask could turn on any bits in Src.
4902 if (AndMaskBits & OrMaskBits)
4903 return false;
4904
4905 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4906 Observer.changingInstr(MI);
4907 // Canonicalize the result to have the constant on the RHS.
4908 if (MI.getOperand(1).getReg() == AndMaskReg)
4909 MI.getOperand(2).setReg(AndMaskReg);
4910 MI.getOperand(1).setReg(Src);
4911 Observer.changedInstr(MI);
4912 };
4913 return true;
4914}
4915
4916/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4919 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4920 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4921 Register Dst = MI.getOperand(0).getReg();
4922 Register Src = MI.getOperand(1).getReg();
4923 LLT Ty = MRI.getType(Src);
4925 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4926 return false;
4927 int64_t Width = MI.getOperand(2).getImm();
4928 Register ShiftSrc;
4929 int64_t ShiftImm;
4930 if (!mi_match(
4931 Src, MRI,
4932 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4933 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4934 return false;
4935 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4936 return false;
4937
4938 MatchInfo = [=](MachineIRBuilder &B) {
4939 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4940 auto Cst2 = B.buildConstant(ExtractTy, Width);
4941 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4942 };
4943 return true;
4944}
4945
4946/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4948 BuildFnTy &MatchInfo) const {
4949 GAnd *And = cast<GAnd>(&MI);
4950 Register Dst = And->getReg(0);
4951 LLT Ty = MRI.getType(Dst);
4953 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4954 // into account.
4955 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4956 return false;
4957
4958 int64_t AndImm, LSBImm;
4959 Register ShiftSrc;
4960 const unsigned Size = Ty.getScalarSizeInBits();
4961 if (!mi_match(And->getReg(0), MRI,
4962 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4963 m_ICst(AndImm))))
4964 return false;
4965
4966 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4967 auto MaybeMask = static_cast<uint64_t>(AndImm);
4968 if (MaybeMask & (MaybeMask + 1))
4969 return false;
4970
4971 // LSB must fit within the register.
4972 if (static_cast<uint64_t>(LSBImm) >= Size)
4973 return false;
4974
4975 uint64_t Width = APInt(Size, AndImm).countr_one();
4976 MatchInfo = [=](MachineIRBuilder &B) {
4977 auto WidthCst = B.buildConstant(ExtractTy, Width);
4978 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4979 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4980 };
4981 return true;
4982}
4983
4986 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4987 const unsigned Opcode = MI.getOpcode();
4988 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4989
4990 const Register Dst = MI.getOperand(0).getReg();
4991
4992 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4993 ? TargetOpcode::G_SBFX
4994 : TargetOpcode::G_UBFX;
4995
4996 // Check if the type we would use for the extract is legal
4997 LLT Ty = MRI.getType(Dst);
4999 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
5000 return false;
5001
5002 Register ShlSrc;
5003 int64_t ShrAmt;
5004 int64_t ShlAmt;
5005 const unsigned Size = Ty.getScalarSizeInBits();
5006
5007 // Try to match shr (shl x, c1), c2
5008 if (!mi_match(Dst, MRI,
5009 m_BinOp(Opcode,
5010 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
5011 m_ICst(ShrAmt))))
5012 return false;
5013
5014 // Make sure that the shift sizes can fit a bitfield extract
5015 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
5016 return false;
5017
5018 // Skip this combine if the G_SEXT_INREG combine could handle it
5019 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
5020 return false;
5021
5022 // Calculate start position and width of the extract
5023 const int64_t Pos = ShrAmt - ShlAmt;
5024 const int64_t Width = Size - ShrAmt;
5025
5026 MatchInfo = [=](MachineIRBuilder &B) {
5027 auto WidthCst = B.buildConstant(ExtractTy, Width);
5028 auto PosCst = B.buildConstant(ExtractTy, Pos);
5029 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
5030 };
5031 return true;
5032}
5033
5036 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5037 const unsigned Opcode = MI.getOpcode();
5038 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
5039
5040 const Register Dst = MI.getOperand(0).getReg();
5041 LLT Ty = MRI.getType(Dst);
5043 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
5044 return false;
5045
5046 // Try to match shr (and x, c1), c2
5047 Register AndSrc;
5048 int64_t ShrAmt;
5049 int64_t SMask;
5050 if (!mi_match(Dst, MRI,
5051 m_BinOp(Opcode,
5052 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
5053 m_ICst(ShrAmt))))
5054 return false;
5055
5056 const unsigned Size = Ty.getScalarSizeInBits();
5057 if (ShrAmt < 0 || ShrAmt >= Size)
5058 return false;
5059
5060 // If the shift subsumes the mask, emit the 0 directly.
5061 if (0 == (SMask >> ShrAmt)) {
5062 MatchInfo = [=](MachineIRBuilder &B) {
5063 B.buildConstant(Dst, 0);
5064 };
5065 return true;
5066 }
5067
5068 // Check that ubfx can do the extraction, with no holes in the mask.
5069 uint64_t UMask = SMask;
5070 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
5072 if (!isMask_64(UMask))
5073 return false;
5074
5075 // Calculate start position and width of the extract.
5076 const int64_t Pos = ShrAmt;
5077 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
5078
5079 // It's preferable to keep the shift, rather than form G_SBFX.
5080 // TODO: remove the G_AND via demanded bits analysis.
5081 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5082 return false;
5083
5084 MatchInfo = [=](MachineIRBuilder &B) {
5085 auto WidthCst = B.buildConstant(ExtractTy, Width);
5086 auto PosCst = B.buildConstant(ExtractTy, Pos);
5087 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5088 };
5089 return true;
5090}
5091
5092bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5093 MachineInstr &MI) const {
5094 auto &PtrAdd = cast<GPtrAdd>(MI);
5095
5096 Register Src1Reg = PtrAdd.getBaseReg();
5097 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5098 if (!Src1Def)
5099 return false;
5100
5101 Register Src2Reg = PtrAdd.getOffsetReg();
5102
5103 if (MRI.hasOneNonDBGUse(Src1Reg))
5104 return false;
5105
5106 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5107 if (!C1)
5108 return false;
5109 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5110 if (!C2)
5111 return false;
5112
5113 const APInt &C1APIntVal = *C1;
5114 const APInt &C2APIntVal = *C2;
5115 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5116
5117 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5118 // This combine may end up running before ptrtoint/inttoptr combines
5119 // manage to eliminate redundant conversions, so try to look through them.
5120 MachineInstr *ConvUseMI = &UseMI;
5121 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5122 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5123 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5124 Register DefReg = ConvUseMI->getOperand(0).getReg();
5125 if (!MRI.hasOneNonDBGUse(DefReg))
5126 break;
5127 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5128 ConvUseOpc = ConvUseMI->getOpcode();
5129 }
5130 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5131 if (!LdStMI)
5132 continue;
5133 // Is x[offset2] already not a legal addressing mode? If so then
5134 // reassociating the constants breaks nothing (we test offset2 because
5135 // that's the one we hope to fold into the load or store).
5136 TargetLoweringBase::AddrMode AM;
5137 AM.HasBaseReg = true;
5138 AM.BaseOffs = C2APIntVal.getSExtValue();
5139 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5140 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5141 PtrAdd.getMF()->getFunction().getContext());
5142 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5143 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5144 AccessTy, AS))
5145 continue;
5146
5147 // Would x[offset1+offset2] still be a legal addressing mode?
5148 AM.BaseOffs = CombinedValue;
5149 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5150 AccessTy, AS))
5151 return true;
5152 }
5153
5154 return false;
5155}
5156
5158 MachineInstr *RHS,
5159 BuildFnTy &MatchInfo) const {
5160 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5161 Register Src1Reg = MI.getOperand(1).getReg();
5162 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5163 return false;
5164 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5165 if (!C2)
5166 return false;
5167
5168 // If both additions are nuw, the reassociated additions are also nuw.
5169 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5170 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5171 // therefore also nusw.
5172 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5173 // the new G_PTR_ADDs are then also inbounds.
5174 unsigned PtrAddFlags = MI.getFlags();
5175 unsigned AddFlags = RHS->getFlags();
5176 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5177 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5178 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5179 unsigned Flags = 0;
5180 if (IsNoUWrap)
5182 if (IsNoUSWrap)
5184 if (IsInBounds)
5186
5187 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5188 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5189
5190 auto NewBase =
5191 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5192 Observer.changingInstr(MI);
5193 MI.getOperand(1).setReg(NewBase.getReg(0));
5194 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5195 MI.setFlags(Flags);
5196 Observer.changedInstr(MI);
5197 };
5198 return !reassociationCanBreakAddressingModePattern(MI);
5199}
5200
5202 MachineInstr *LHS,
5203 MachineInstr *RHS,
5204 BuildFnTy &MatchInfo) const {
5205 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5206 // if and only if (G_PTR_ADD X, C) has one use.
5207 Register LHSBase;
5208 std::optional<ValueAndVReg> LHSCstOff;
5209 if (!mi_match(MI.getBaseReg(), MRI,
5210 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5211 return false;
5212
5213 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5214
5215 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5216 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5217 // so the new G_PTR_ADDs are also inbounds.
5218 unsigned PtrAddFlags = MI.getFlags();
5219 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5220 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5221 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5223 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5225 unsigned Flags = 0;
5226 if (IsNoUWrap)
5228 if (IsNoUSWrap)
5230 if (IsInBounds)
5232
5233 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5234 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5235 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5236 // doesn't happen.
5237 LHSPtrAdd->moveBefore(&MI);
5238 Register RHSReg = MI.getOffsetReg();
5239 // set VReg will cause type mismatch if it comes from extend/trunc
5240 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5241 Observer.changingInstr(MI);
5242 MI.getOperand(2).setReg(NewCst.getReg(0));
5243 MI.setFlags(Flags);
5244 Observer.changedInstr(MI);
5245 Observer.changingInstr(*LHSPtrAdd);
5246 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5247 LHSPtrAdd->setFlags(Flags);
5248 Observer.changedInstr(*LHSPtrAdd);
5249 };
5250 return !reassociationCanBreakAddressingModePattern(MI);
5251}
5252
5254 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5255 BuildFnTy &MatchInfo) const {
5256 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5257 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5258 if (!LHSPtrAdd)
5259 return false;
5260
5261 Register Src2Reg = MI.getOperand(2).getReg();
5262 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5263 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5264 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5265 if (!C1)
5266 return false;
5267 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5268 if (!C2)
5269 return false;
5270
5271 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5272 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5273 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5274 // largest signed integer that fits into the index type, which is the maximum
5275 // size of allocated objects according to the IR Language Reference.
5276 unsigned PtrAddFlags = MI.getFlags();
5277 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5278 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5279 bool IsInBounds =
5280 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5281 unsigned Flags = 0;
5282 if (IsNoUWrap)
5284 if (IsInBounds) {
5287 }
5288
5289 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5290 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5291 Observer.changingInstr(MI);
5292 MI.getOperand(1).setReg(LHSSrc1);
5293 MI.getOperand(2).setReg(NewCst.getReg(0));
5294 MI.setFlags(Flags);
5295 Observer.changedInstr(MI);
5296 };
5297 return !reassociationCanBreakAddressingModePattern(MI);
5298}
5299
5301 BuildFnTy &MatchInfo) const {
5302 auto &PtrAdd = cast<GPtrAdd>(MI);
5303 // We're trying to match a few pointer computation patterns here for
5304 // re-association opportunities.
5305 // 1) Isolating a constant operand to be on the RHS, e.g.:
5306 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5307 //
5308 // 2) Folding two constants in each sub-tree as long as such folding
5309 // doesn't break a legal addressing mode.
5310 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5311 //
5312 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5313 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5314 // iif (G_PTR_ADD X, C) has one use.
5315 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5316 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5317
5318 // Try to match example 2.
5319 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5320 return true;
5321
5322 // Try to match example 3.
5323 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5324 return true;
5325
5326 // Try to match example 1.
5327 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5328 return true;
5329
5330 return false;
5331}
5333 Register OpLHS, Register OpRHS,
5334 BuildFnTy &MatchInfo) const {
5335 LLT OpRHSTy = MRI.getType(OpRHS);
5336 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5337
5338 if (OpLHSDef->getOpcode() != Opc)
5339 return false;
5340
5341 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5342 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5343 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5344
5345 // If the inner op is (X op C), pull the constant out so it can be folded with
5346 // other constants in the expression tree. Folding is not guaranteed so we
5347 // might have (C1 op C2). In that case do not pull a constant out because it
5348 // won't help and can lead to infinite loops.
5349 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5350 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5351 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5352 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5353 MatchInfo = [=](MachineIRBuilder &B) {
5354 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5355 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5356 };
5357 return true;
5358 }
5359 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5360 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5361 // iff (op x, c1) has one use
5362 MatchInfo = [=](MachineIRBuilder &B) {
5363 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5364 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5365 };
5366 return true;
5367 }
5368 }
5369
5370 return false;
5371}
5372
5374 BuildFnTy &MatchInfo) const {
5375 // We don't check if the reassociation will break a legal addressing mode
5376 // here since pointer arithmetic is handled by G_PTR_ADD.
5377 unsigned Opc = MI.getOpcode();
5378 Register DstReg = MI.getOperand(0).getReg();
5379 Register LHSReg = MI.getOperand(1).getReg();
5380 Register RHSReg = MI.getOperand(2).getReg();
5381
5382 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5383 return true;
5384 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5385 return true;
5386 return false;
5387}
5388
5390 APInt &MatchInfo) const {
5391 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5392 Register SrcOp = MI.getOperand(1).getReg();
5393
5394 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5395 MatchInfo = *MaybeCst;
5396 return true;
5397 }
5398
5399 return false;
5400}
5401
5403 BuildFnTy &MatchInfo) const {
5404 Register Dst = MI.getOperand(0).getReg();
5405 auto Csts = ConstantFoldUnaryIntOp(MI.getOpcode(), MRI.getType(Dst),
5406 MI.getOperand(1).getReg(), MRI);
5407 if (Csts.empty())
5408 return false;
5409
5410 MatchInfo = [Dst, Csts = std::move(Csts)](MachineIRBuilder &B) {
5411 if (Csts.size() == 1)
5412 B.buildConstant(Dst, Csts[0]);
5413 else
5414 B.buildBuildVectorConstant(Dst, Csts);
5415 };
5416 return true;
5417}
5418
5420 APInt &MatchInfo) const {
5421 Register Op1 = MI.getOperand(1).getReg();
5422 Register Op2 = MI.getOperand(2).getReg();
5423 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5424 if (!MaybeCst)
5425 return false;
5426 MatchInfo = *MaybeCst;
5427 return true;
5428}
5429
5431 ConstantFP *&MatchInfo) const {
5432 Register Op1 = MI.getOperand(1).getReg();
5433 Register Op2 = MI.getOperand(2).getReg();
5434 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5435 if (!MaybeCst)
5436 return false;
5437 MatchInfo =
5438 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5439 return true;
5440}
5441
5443 ConstantFP *&MatchInfo) const {
5444 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5445 MI.getOpcode() == TargetOpcode::G_FMAD);
5446 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5447
5448 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5449 if (!Op3Cst)
5450 return false;
5451
5452 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5453 if (!Op2Cst)
5454 return false;
5455
5456 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5457 if (!Op1Cst)
5458 return false;
5459
5460 APFloat Op1F = Op1Cst->getValueAPF();
5461 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5463 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5464 return true;
5465}
5466
5469 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5470 // Look for a binop feeding into an AND with a mask:
5471 //
5472 // %add = G_ADD %lhs, %rhs
5473 // %and = G_AND %add, 000...11111111
5474 //
5475 // Check if it's possible to perform the binop at a narrower width and zext
5476 // back to the original width like so:
5477 //
5478 // %narrow_lhs = G_TRUNC %lhs
5479 // %narrow_rhs = G_TRUNC %rhs
5480 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5481 // %new_add = G_ZEXT %narrow_add
5482 // %and = G_AND %new_add, 000...11111111
5483 //
5484 // This can allow later combines to eliminate the G_AND if it turns out
5485 // that the mask is irrelevant.
5486 assert(MI.getOpcode() == TargetOpcode::G_AND);
5487 Register Dst = MI.getOperand(0).getReg();
5488 Register AndLHS = MI.getOperand(1).getReg();
5489 Register AndRHS = MI.getOperand(2).getReg();
5490 LLT WideTy = MRI.getType(Dst);
5491
5492 // If the potential binop has more than one use, then it's possible that one
5493 // of those uses will need its full width.
5494 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5495 return false;
5496
5497 // Check if the LHS feeding the AND is impacted by the high bits that we're
5498 // masking out.
5499 //
5500 // e.g. for 64-bit x, y:
5501 //
5502 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5503 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5504 if (!LHSInst)
5505 return false;
5506 unsigned LHSOpc = LHSInst->getOpcode();
5507 switch (LHSOpc) {
5508 default:
5509 return false;
5510 case TargetOpcode::G_ADD:
5511 case TargetOpcode::G_SUB:
5512 case TargetOpcode::G_MUL:
5513 case TargetOpcode::G_AND:
5514 case TargetOpcode::G_OR:
5515 case TargetOpcode::G_XOR:
5516 break;
5517 }
5518
5519 // Find the mask on the RHS.
5520 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5521 if (!Cst)
5522 return false;
5523 auto Mask = Cst->Value;
5524 if (!Mask.isMask())
5525 return false;
5526
5527 // No point in combining if there's nothing to truncate.
5528 unsigned NarrowWidth = Mask.countr_one();
5529 if (NarrowWidth == WideTy.getSizeInBits())
5530 return false;
5531 LLT NarrowTy = LLT::integer(NarrowWidth);
5532
5533 // Check if adding the zext + truncates could be harmful.
5534 auto &MF = *MI.getMF();
5535 const auto &TLI = getTargetLowering();
5536 LLVMContext &Ctx = MF.getFunction().getContext();
5537 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5538 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5539 return false;
5540 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5541 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5542 return false;
5543 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5544 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5545 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5546 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5547 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5548 auto NarrowBinOp =
5549 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5550 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5551 Observer.changingInstr(MI);
5552 MI.getOperand(1).setReg(Ext.getReg(0));
5553 Observer.changedInstr(MI);
5554 };
5555 return true;
5556}
5557
5559 BuildFnTy &MatchInfo) const {
5560 unsigned Opc = MI.getOpcode();
5561 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5562
5563 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5564 return false;
5565
5566 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5567 Observer.changingInstr(MI);
5568 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5569 : TargetOpcode::G_SADDO;
5570 MI.setDesc(Builder.getTII().get(NewOpc));
5571 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5572 Observer.changedInstr(MI);
5573 };
5574 return true;
5575}
5576
5578 BuildFnTy &MatchInfo) const {
5579 // (G_*MULO x, 0) -> 0 + no carry out
5580 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5581 MI.getOpcode() == TargetOpcode::G_SMULO);
5582 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5583 return false;
5584 Register Dst = MI.getOperand(0).getReg();
5585 Register Carry = MI.getOperand(1).getReg();
5586 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5587 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5588 return false;
5589 MatchInfo = [=](MachineIRBuilder &B) {
5590 B.buildConstant(Dst, 0);
5591 B.buildConstant(Carry, 0);
5592 };
5593 return true;
5594}
5595
5597 BuildFnTy &MatchInfo) const {
5598 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5599 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5600 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5601 MI.getOpcode() == TargetOpcode::G_SADDE ||
5602 MI.getOpcode() == TargetOpcode::G_USUBE ||
5603 MI.getOpcode() == TargetOpcode::G_SSUBE);
5604 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5605 return false;
5606 MatchInfo = [&](MachineIRBuilder &B) {
5607 unsigned NewOpcode;
5608 switch (MI.getOpcode()) {
5609 case TargetOpcode::G_UADDE:
5610 NewOpcode = TargetOpcode::G_UADDO;
5611 break;
5612 case TargetOpcode::G_SADDE:
5613 NewOpcode = TargetOpcode::G_SADDO;
5614 break;
5615 case TargetOpcode::G_USUBE:
5616 NewOpcode = TargetOpcode::G_USUBO;
5617 break;
5618 case TargetOpcode::G_SSUBE:
5619 NewOpcode = TargetOpcode::G_SSUBO;
5620 break;
5621 }
5622 Observer.changingInstr(MI);
5623 MI.setDesc(B.getTII().get(NewOpcode));
5624 MI.removeOperand(4);
5625 Observer.changedInstr(MI);
5626 };
5627 return true;
5628}
5629
5631 BuildFnTy &MatchInfo) const {
5632 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5633 Register Dst = MI.getOperand(0).getReg();
5634 // (x + y) - z -> x (if y == z)
5635 // (x + y) - z -> y (if x == z)
5636 Register X, Y, Z;
5637 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5638 Register ReplaceReg;
5639 int64_t CstX, CstY;
5640 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5642 ReplaceReg = X;
5643 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5645 ReplaceReg = Y;
5646 if (ReplaceReg) {
5647 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5648 return true;
5649 }
5650 }
5651
5652 // x - (y + z) -> 0 - y (if x == z)
5653 // x - (y + z) -> 0 - z (if x == y)
5654 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5655 Register ReplaceReg;
5656 int64_t CstX;
5657 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5659 ReplaceReg = Y;
5660 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5662 ReplaceReg = Z;
5663 if (ReplaceReg) {
5664 MatchInfo = [=](MachineIRBuilder &B) {
5665 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5666 B.buildSub(Dst, Zero, ReplaceReg);
5667 };
5668 return true;
5669 }
5670 }
5671 return false;
5672}
5673
5675 unsigned Opcode = MI.getOpcode();
5676 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5677 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5678 Register Dst = UDivorRem.getReg(0);
5679 Register LHS = UDivorRem.getReg(1);
5680 Register RHS = UDivorRem.getReg(2);
5681 LLT Ty = MRI.getType(Dst);
5682 LLT ScalarTy = Ty.getScalarType();
5683 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5685 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5686
5687 auto &MIB = Builder;
5688
5689 bool UseSRL = false;
5690 SmallVector<Register, 16> Shifts, Factors;
5691 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5692 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5693
5694 auto BuildExactUDIVPattern = [&](const Constant *C) {
5695 // Don't recompute inverses for each splat element.
5696 if (IsSplat && !Factors.empty()) {
5697 Shifts.push_back(Shifts[0]);
5698 Factors.push_back(Factors[0]);
5699 return true;
5700 }
5701
5702 auto *CI = cast<ConstantInt>(C);
5703 APInt Divisor = CI->getValue();
5704 unsigned Shift = Divisor.countr_zero();
5705 if (Shift) {
5706 Divisor.lshrInPlace(Shift);
5707 UseSRL = true;
5708 }
5709
5710 // Calculate the multiplicative inverse modulo BW.
5711 APInt Factor = Divisor.multiplicativeInverse();
5712 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5713 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5714 return true;
5715 };
5716
5717 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5718 // Collect all magic values from the build vector.
5719 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5720 llvm_unreachable("Expected unary predicate match to succeed");
5721
5722 Register Shift, Factor;
5723 if (Ty.isVector()) {
5724 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5725 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5726 } else {
5727 Shift = Shifts[0];
5728 Factor = Factors[0];
5729 }
5730
5731 Register Res = LHS;
5732
5733 if (UseSRL)
5734 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5735
5736 return MIB.buildMul(Ty, Res, Factor);
5737 }
5738
5739 unsigned KnownLeadingZeros =
5740 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5741
5742 bool UseNPQ = false;
5743 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5744 auto BuildUDIVPattern = [&](const Constant *C) {
5745 auto *CI = cast<ConstantInt>(C);
5746 const APInt &Divisor = CI->getValue();
5747
5748 bool SelNPQ = false;
5749 APInt Magic(Divisor.getBitWidth(), 0);
5750 unsigned PreShift = 0, PostShift = 0;
5751
5752 // Magic algorithm doesn't work for division by 1. We need to emit a select
5753 // at the end.
5754 // TODO: Use undef values for divisor of 1.
5755 if (!Divisor.isOne()) {
5756
5757 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5758 // in the dividend exceeds the leading zeros for the divisor.
5761 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5762
5763 Magic = std::move(magics.Magic);
5764
5765 assert(magics.PreShift < Divisor.getBitWidth() &&
5766 "We shouldn't generate an undefined shift!");
5767 assert(magics.PostShift < Divisor.getBitWidth() &&
5768 "We shouldn't generate an undefined shift!");
5769 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5770 PreShift = magics.PreShift;
5771 PostShift = magics.PostShift;
5772 SelNPQ = magics.IsAdd;
5773 }
5774
5775 PreShifts.push_back(
5776 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5777 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5778 NPQFactors.push_back(
5779 MIB.buildConstant(ScalarTy,
5780 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5781 : APInt::getZero(EltBits))
5782 .getReg(0));
5783 PostShifts.push_back(
5784 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5785 UseNPQ |= SelNPQ;
5786 return true;
5787 };
5788
5789 // Collect the shifts/magic values from each element.
5790 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5791 (void)Matched;
5792 assert(Matched && "Expected unary predicate match to succeed");
5793
5794 Register PreShift, PostShift, MagicFactor, NPQFactor;
5795 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5796 if (RHSDef) {
5797 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5798 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5799 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5800 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5801 } else {
5802 assert(MRI.getType(RHS).isScalar() &&
5803 "Non-build_vector operation should have been a scalar");
5804 PreShift = PreShifts[0];
5805 MagicFactor = MagicFactors[0];
5806 PostShift = PostShifts[0];
5807 }
5808
5809 Register Q = LHS;
5810 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5811
5812 // Multiply the numerator (operand 0) by the magic value.
5813 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5814
5815 if (UseNPQ) {
5816 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5817
5818 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5819 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5820 if (Ty.isVector())
5821 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5822 else
5823 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5824
5825 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5826 }
5827
5828 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5829 auto One = MIB.buildConstant(Ty, 1);
5830 auto IsOne = MIB.buildICmp(
5832 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5833 RHS, One);
5834 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5835
5836 if (Opcode == TargetOpcode::G_UREM) {
5837 auto Prod = MIB.buildMul(Ty, ret, RHS);
5838 return MIB.buildSub(Ty, LHS, Prod);
5839 }
5840 return ret;
5841}
5842
5844 unsigned Opcode = MI.getOpcode();
5845 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5846 Register Dst = MI.getOperand(0).getReg();
5847 Register RHS = MI.getOperand(2).getReg();
5848 LLT DstTy = MRI.getType(Dst);
5849
5850 auto &MF = *MI.getMF();
5851 AttributeList Attr = MF.getFunction().getAttributes();
5852 const auto &TLI = getTargetLowering();
5853 LLVMContext &Ctx = MF.getFunction().getContext();
5854 if (DstTy.getScalarSizeInBits() == 1 ||
5855 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5856 return false;
5857
5858 // Don't do this for minsize because the instruction sequence is usually
5859 // larger.
5860 if (MF.getFunction().hasMinSize())
5861 return false;
5862
5863 if (Opcode == TargetOpcode::G_UDIV &&
5865 return matchUnaryPredicate(
5866 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5867 }
5868
5869 auto *RHSDef = MRI.getVRegDef(RHS);
5870 if (!isConstantOrConstantVector(*RHSDef, MRI))
5871 return false;
5872
5873 // Don't do this if the types are not going to be legal.
5874 if (LI) {
5875 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5876 return false;
5877 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5878 return false;
5880 {TargetOpcode::G_ICMP,
5881 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5882 DstTy}}))
5883 return false;
5884 if (Opcode == TargetOpcode::G_UREM &&
5885 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5886 return false;
5887 }
5888
5889 return matchUnaryPredicate(
5890 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5891}
5892
5894 auto *NewMI = buildUDivOrURemUsingMul(MI);
5895 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5896}
5897
5899 unsigned Opcode = MI.getOpcode();
5900 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5901 Register Dst = MI.getOperand(0).getReg();
5902 Register RHS = MI.getOperand(2).getReg();
5903 LLT DstTy = MRI.getType(Dst);
5904 auto SizeInBits = DstTy.getScalarSizeInBits();
5905 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5906
5907 auto &MF = *MI.getMF();
5908 AttributeList Attr = MF.getFunction().getAttributes();
5909 const auto &TLI = getTargetLowering();
5910 LLVMContext &Ctx = MF.getFunction().getContext();
5911 if (DstTy.getScalarSizeInBits() < 3 ||
5912 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5913 return false;
5914
5915 // Don't do this for minsize because the instruction sequence is usually
5916 // larger.
5917 if (MF.getFunction().hasMinSize())
5918 return false;
5919
5920 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5921 if (Opcode == TargetOpcode::G_SDIV &&
5923 return matchUnaryPredicate(
5924 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5925 }
5926
5927 auto *RHSDef = MRI.getVRegDef(RHS);
5928 if (!isConstantOrConstantVector(*RHSDef, MRI))
5929 return false;
5930
5931 // Don't do this if the types are not going to be legal.
5932 if (LI) {
5933 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5934 return false;
5935 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5936 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5937 return false;
5938 if (Opcode == TargetOpcode::G_SREM &&
5939 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5940 return false;
5941 }
5942
5943 return matchUnaryPredicate(
5944 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5945}
5946
5948 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5949 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5950}
5951
5953 unsigned Opcode = MI.getOpcode();
5954 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5955 Opcode == TargetOpcode::G_SREM);
5956 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5957 Register Dst = SDivorRem.getReg(0);
5958 Register LHS = SDivorRem.getReg(1);
5959 Register RHS = SDivorRem.getReg(2);
5960 LLT Ty = MRI.getType(Dst);
5961 LLT ScalarTy = Ty.getScalarType();
5962 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5964 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5965 auto &MIB = Builder;
5966
5967 bool UseSRA = false;
5968 SmallVector<Register, 16> ExactShifts, ExactFactors;
5969
5970 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5971 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5972
5973 auto BuildExactSDIVPattern = [&](const Constant *C) {
5974 // Don't recompute inverses for each splat element.
5975 if (IsSplat && !ExactFactors.empty()) {
5976 ExactShifts.push_back(ExactShifts[0]);
5977 ExactFactors.push_back(ExactFactors[0]);
5978 return true;
5979 }
5980
5981 auto *CI = cast<ConstantInt>(C);
5982 APInt Divisor = CI->getValue();
5983 unsigned Shift = Divisor.countr_zero();
5984 if (Shift) {
5985 Divisor.ashrInPlace(Shift);
5986 UseSRA = true;
5987 }
5988
5989 // Calculate the multiplicative inverse modulo BW.
5990 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5991 APInt Factor = Divisor.multiplicativeInverse();
5992 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5993 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5994 return true;
5995 };
5996
5997 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5998 // Collect all magic values from the build vector.
5999 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
6000 (void)Matched;
6001 assert(Matched && "Expected unary predicate match to succeed");
6002
6003 Register Shift, Factor;
6004 if (Ty.isVector()) {
6005 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
6006 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
6007 } else {
6008 Shift = ExactShifts[0];
6009 Factor = ExactFactors[0];
6010 }
6011
6012 Register Res = LHS;
6013
6014 if (UseSRA)
6015 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
6016
6017 return MIB.buildMul(Ty, Res, Factor);
6018 }
6019
6020 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6021
6022 auto BuildSDIVPattern = [&](const Constant *C) {
6023 auto *CI = cast<ConstantInt>(C);
6024 const APInt &Divisor = CI->getValue();
6025
6028 int NumeratorFactor = 0;
6029 int ShiftMask = -1;
6030
6031 if (Divisor.isOne() || Divisor.isAllOnes()) {
6032 // If d is +1/-1, we just multiply the numerator by +1/-1.
6033 NumeratorFactor = Divisor.getSExtValue();
6034 Magics.Magic = 0;
6035 Magics.ShiftAmount = 0;
6036 ShiftMask = 0;
6037 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
6038 // If d > 0 and m < 0, add the numerator.
6039 NumeratorFactor = 1;
6040 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
6041 // If d < 0 and m > 0, subtract the numerator.
6042 NumeratorFactor = -1;
6043 }
6044
6045 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
6046 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
6047 Shifts.push_back(
6048 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
6049 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
6050
6051 return true;
6052 };
6053
6054 // Collect the shifts/magic values from each element.
6055 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
6056 (void)Matched;
6057 assert(Matched && "Expected unary predicate match to succeed");
6058
6059 Register MagicFactor, Factor, Shift, ShiftMask;
6060 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
6061 if (RHSDef) {
6062 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
6063 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
6064 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
6065 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
6066 } else {
6067 assert(MRI.getType(RHS).isScalar() &&
6068 "Non-build_vector operation should have been a scalar");
6069 MagicFactor = MagicFactors[0];
6070 Factor = Factors[0];
6071 Shift = Shifts[0];
6072 ShiftMask = ShiftMasks[0];
6073 }
6074
6075 Register Q = LHS;
6076 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
6077
6078 // (Optionally) Add/subtract the numerator using Factor.
6079 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
6080 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
6081
6082 // Shift right algebraic by shift value.
6083 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
6084
6085 // Extract the sign bit, mask it and add it to the quotient.
6086 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
6087 auto T = MIB.buildLShr(Ty, Q, SignShift);
6088 T = MIB.buildAnd(Ty, T, ShiftMask);
6089 auto ret = MIB.buildAdd(Ty, Q, T);
6090
6091 if (Opcode == TargetOpcode::G_SREM) {
6092 auto Prod = MIB.buildMul(Ty, ret, RHS);
6093 return MIB.buildSub(Ty, LHS, Prod);
6094 }
6095 return ret;
6096}
6097
6099 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6100 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6101 "Expected SDIV or UDIV");
6102 auto &Div = cast<GenericMachineInstr>(MI);
6103 Register RHS = Div.getReg(2);
6104 auto MatchPow2 = [&](const Constant *C) {
6105 auto *CI = dyn_cast<ConstantInt>(C);
6106 return CI && (CI->getValue().isPowerOf2() ||
6107 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6108 };
6109 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6110}
6111
6113 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6114 auto &SDiv = cast<GenericMachineInstr>(MI);
6115 Register Dst = SDiv.getReg(0);
6116 Register LHS = SDiv.getReg(1);
6117 Register RHS = SDiv.getReg(2);
6118 LLT Ty = MRI.getType(Dst);
6120 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6121 : LLT::integer(1);
6122
6123 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6124 // to the following version:
6125 //
6126 // %c1 = G_CTTZ %rhs
6127 // %inexact = G_SUB $bitwidth, %c1
6128 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6129 // %lshr = G_LSHR %sign, %inexact
6130 // %add = G_ADD %lhs, %lshr
6131 // %ashr = G_ASHR %add, %c1
6132 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6133 // %zero = G_CONSTANT $0
6134 // %neg = G_NEG %ashr
6135 // %isneg = G_ICMP SLT %rhs, %zero
6136 // %res = G_SELECT %isneg, %neg, %ashr
6137
6138 unsigned BitWidth = Ty.getScalarSizeInBits();
6139 auto Zero = Builder.buildConstant(Ty, 0);
6140
6141 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6142 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6143 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6144 // Splat the sign bit into the register
6145 auto Sign = Builder.buildAShr(
6146 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6147
6148 // Add (LHS < 0) ? abs2 - 1 : 0;
6149 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6150 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6151 auto AShr = Builder.buildAShr(Ty, Add, C1);
6152
6153 // Special case: (sdiv X, 1) -> X
6154 // Special Case: (sdiv X, -1) -> 0-X
6155 auto One = Builder.buildConstant(Ty, 1);
6156 auto MinusOne = Builder.buildConstant(Ty, -1);
6157 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6158 auto IsMinusOne =
6159 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6160 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6161 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6162
6163 // If divided by a positive value, we're done. Otherwise, the result must be
6164 // negated.
6165 auto Neg = Builder.buildNeg(Ty, AShr);
6166 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6167 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6168 MI.eraseFromParent();
6169}
6170
6172 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6173 auto &UDiv = cast<GenericMachineInstr>(MI);
6174 Register Dst = UDiv.getReg(0);
6175 Register LHS = UDiv.getReg(1);
6176 Register RHS = UDiv.getReg(2);
6177 LLT Ty = MRI.getType(Dst);
6179
6180 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6181 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6182 MI.eraseFromParent();
6183}
6184
6186 assert(MI.getOpcode() == TargetOpcode::G_SREM && "Expected SREM");
6187 auto &SRem = cast<GBinOp>(MI);
6188 Register Dst = SRem.getReg(0);
6189 Register LHS = SRem.getLHSReg();
6190 Register RHS = SRem.getRHSReg();
6191 LLT Ty = MRI.getType(Dst);
6193
6194 // Effectively we want to lower G_SREM %lhs, %rhs, where %rhs is +/- a power
6195 // of 2, to the following branch-free bias-and-mask version:
6196 //
6197 // %abs = G_ABS %rhs
6198 // %mask = G_SUB %abs, 1
6199 // %sign = G_ASHR %lhs, $(bitwidth - 1)
6200 // %bias = G_AND %sign, %mask
6201 // %biased = G_ADD %lhs, %bias
6202 // %masked = G_AND %biased, %mask
6203 // %res = G_SUB %masked, %bias
6204 //
6205 // The bias adds (|%rhs| - 1) for negative %lhs, correcting rounding towards
6206 // zero (instead of towards -inf that a plain mask would give). Constant
6207 // divisors collapse %mask to a single G_CONSTANT via the CSEMIRBuilder folds
6208 // for G_ABS and G_SUB.
6209
6210 unsigned BitWidth = Ty.getScalarSizeInBits();
6211 auto AbsRHS = Builder.buildAbs(Ty, RHS);
6212 auto Mask = Builder.buildSub(Ty, AbsRHS, Builder.buildConstant(Ty, 1));
6213 auto BWMinusOne = Builder.buildConstant(ShiftAmtTy, BitWidth - 1);
6214 auto Sign = Builder.buildAShr(Ty, LHS, BWMinusOne);
6215 auto Bias = Builder.buildAnd(Ty, Sign, Mask);
6216 auto Biased = Builder.buildAdd(Ty, LHS, Bias);
6217 auto Masked = Builder.buildAnd(Ty, Biased, Mask);
6218 Builder.buildSub(Dst, Masked, Bias);
6219 MI.eraseFromParent();
6220}
6221
6223 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6224 Register RHS = MI.getOperand(2).getReg();
6225 Register Dst = MI.getOperand(0).getReg();
6226 LLT Ty = MRI.getType(Dst);
6227 LLT RHSTy = MRI.getType(RHS);
6229 auto MatchPow2ExceptOne = [&](const Constant *C) {
6230 if (auto *CI = dyn_cast<ConstantInt>(C))
6231 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6232 return false;
6233 };
6234 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6235 return false;
6236 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6237 // get log base 2, and it is not always legal for on a target.
6238 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6239 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6240}
6241
6243 Register LHS = MI.getOperand(1).getReg();
6244 Register RHS = MI.getOperand(2).getReg();
6245 Register Dst = MI.getOperand(0).getReg();
6246 LLT Ty = MRI.getType(Dst);
6248 unsigned NumEltBits = Ty.getScalarSizeInBits();
6249
6250 auto LogBase2 = buildLogBase2(RHS, Builder);
6251 auto ShiftAmt =
6252 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6253 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6254 Builder.buildLShr(Dst, LHS, Trunc);
6255 MI.eraseFromParent();
6256}
6257
6259 Register &MatchInfo) const {
6260 Register Dst = MI.getOperand(0).getReg();
6261 Register Src = MI.getOperand(1).getReg();
6262 LLT DstTy = MRI.getType(Dst);
6263 LLT SrcTy = MRI.getType(Src);
6264 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6265 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6266 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6267
6269 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6270 return false;
6271
6272 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6273 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6274 return mi_match(Src, MRI,
6275 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6276 m_SpecificICstOrSplat(SignedMin)),
6277 m_SpecificICstOrSplat(SignedMax))) ||
6278 mi_match(Src, MRI,
6279 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6280 m_SpecificICstOrSplat(SignedMax)),
6281 m_SpecificICstOrSplat(SignedMin)));
6282}
6283
6285 Register &MatchInfo) const {
6286 Register Dst = MI.getOperand(0).getReg();
6287 Builder.buildTruncSSatS(Dst, MatchInfo);
6288 MI.eraseFromParent();
6289}
6290
6292 Register &MatchInfo) const {
6293 Register Dst = MI.getOperand(0).getReg();
6294 Register Src = MI.getOperand(1).getReg();
6295 LLT DstTy = MRI.getType(Dst);
6296 LLT SrcTy = MRI.getType(Src);
6297 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6298 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6299 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6300
6302 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6303 return false;
6304 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6305 return mi_match(Src, MRI,
6307 m_SpecificICstOrSplat(UnsignedMax))) ||
6308 mi_match(Src, MRI,
6309 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6310 m_SpecificICstOrSplat(UnsignedMax)),
6311 m_SpecificICstOrSplat(0))) ||
6312 mi_match(Src, MRI,
6314 m_SpecificICstOrSplat(UnsignedMax)));
6315}
6316
6318 Register &MatchInfo) const {
6319 Register Dst = MI.getOperand(0).getReg();
6320 Builder.buildTruncSSatU(Dst, MatchInfo);
6321 MI.eraseFromParent();
6322}
6323
6325 MachineInstr &MinMI) const {
6326 Register Min = MinMI.getOperand(2).getReg();
6327 Register Val = MinMI.getOperand(1).getReg();
6328 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6329 LLT SrcTy = MRI.getType(Val);
6330 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6331 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6332 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6333
6335 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6336 return false;
6337 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6338 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6339 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6340}
6341
6343 MachineInstr &SrcMI) const {
6344 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6345 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6346
6347 return LI &&
6348 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6349}
6350
6352 BuildFnTy &MatchInfo) const {
6353 unsigned Opc = MI.getOpcode();
6354 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6355 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6356 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6357
6358 Register Dst = MI.getOperand(0).getReg();
6359 Register X = MI.getOperand(1).getReg();
6360 Register Y = MI.getOperand(2).getReg();
6361 LLT Type = MRI.getType(Dst);
6362
6363 // fold (fadd x, fneg(y)) -> (fsub x, y)
6364 // fold (fadd fneg(y), x) -> (fsub x, y)
6365 // G_ADD is commutative so both cases are checked by m_GFAdd
6366 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6367 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6368 Opc = TargetOpcode::G_FSUB;
6369 }
6370 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6371 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6372 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6373 Opc = TargetOpcode::G_FADD;
6374 }
6375 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6376 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6377 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6378 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6379 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6380 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6381 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6382 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6383 // no opcode change
6384 } else
6385 return false;
6386
6387 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6388 Observer.changingInstr(MI);
6389 MI.setDesc(B.getTII().get(Opc));
6390 MI.getOperand(1).setReg(X);
6391 MI.getOperand(2).setReg(Y);
6392 Observer.changedInstr(MI);
6393 };
6394 return true;
6395}
6396
6398 Register &MatchInfo) const {
6399 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6400
6401 Register LHS = MI.getOperand(1).getReg();
6402 MatchInfo = MI.getOperand(2).getReg();
6403 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6404
6405 const auto LHSCst = Ty.isVector()
6406 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6408 if (!LHSCst)
6409 return false;
6410
6411 // -0.0 is always allowed
6412 if (LHSCst->Value.isNegZero())
6413 return true;
6414
6415 // +0.0 is only allowed if nsz is set.
6416 if (LHSCst->Value.isPosZero())
6417 return MI.getFlag(MachineInstr::FmNsz);
6418
6419 return false;
6420}
6421
6423 Register &MatchInfo) const {
6424 Register Dst = MI.getOperand(0).getReg();
6425 Builder.buildFNeg(
6426 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6427 eraseInst(MI);
6428}
6429
6430/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6431/// due to global flags or MachineInstr flags.
6432static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6433 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6434 return false;
6435 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6436}
6437
6438static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6439 const MachineRegisterInfo &MRI) {
6440 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6441 MRI.use_instr_nodbg_end()) >
6442 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6443 MRI.use_instr_nodbg_end());
6444}
6445
6447 bool &AllowFusionGlobally,
6448 bool &HasFMAD, bool &Aggressive,
6449 bool CanReassociate) const {
6450
6451 auto *MF = MI.getMF();
6452 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6453 const TargetOptions &Options = MF->getTarget().Options;
6454 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6455
6456 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6457 return false;
6458
6459 // Floating-point multiply-add with intermediate rounding.
6460 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6461 // Floating-point multiply-add without intermediate rounding.
6462 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6463 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6464 // No valid opcode, do not combine.
6465 if (!HasFMAD && !HasFMA)
6466 return false;
6467
6468 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6469 // If the addition is not contractable, do not combine.
6470 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6471 return false;
6472
6473 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6474 return true;
6475}
6476
6479 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6480 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6481
6482 bool AllowFusionGlobally, HasFMAD, Aggressive;
6483 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6484 return false;
6485
6486 Register Op1 = MI.getOperand(1).getReg();
6487 Register Op2 = MI.getOperand(2).getReg();
6488 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6489 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6490 unsigned PreferredFusedOpcode =
6491 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6492
6493 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6494 // prefer to fold the multiply with fewer uses.
6495 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6496 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6497 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6498 std::swap(LHS, RHS);
6499 }
6500
6501 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6502 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6503 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6504 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6505 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6506 {LHS.MI->getOperand(1).getReg(),
6507 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6508 };
6509 return true;
6510 }
6511
6512 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6513 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6514 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6515 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6516 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6517 {RHS.MI->getOperand(1).getReg(),
6518 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6519 };
6520 return true;
6521 }
6522
6523 return false;
6524}
6525
6528 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6529 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6530
6531 bool AllowFusionGlobally, HasFMAD, Aggressive;
6532 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6533 return false;
6534
6535 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6536 Register Op1 = MI.getOperand(1).getReg();
6537 Register Op2 = MI.getOperand(2).getReg();
6538 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6539 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6540 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6541
6542 unsigned PreferredFusedOpcode =
6543 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6544
6545 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6546 // prefer to fold the multiply with fewer uses.
6547 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6548 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6549 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6550 std::swap(LHS, RHS);
6551 }
6552
6553 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6554 MachineInstr *FpExtSrc;
6555 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6556 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6557 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6558 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6559 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6560 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6561 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6562 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6563 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6564 };
6565 return true;
6566 }
6567
6568 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6569 // Note: Commutes FADD operands.
6570 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6571 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6572 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6573 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6574 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6575 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6576 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6577 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6578 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6579 };
6580 return true;
6581 }
6582
6583 return false;
6584}
6585
6588 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6589 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6590
6591 bool AllowFusionGlobally, HasFMAD, Aggressive;
6592 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6593 return false;
6594
6595 Register Op1 = MI.getOperand(1).getReg();
6596 Register Op2 = MI.getOperand(2).getReg();
6597 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6598 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6599 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6600
6601 unsigned PreferredFusedOpcode =
6602 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6603
6604 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6605 // prefer to fold the multiply with fewer uses.
6606 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6607 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6608 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6609 std::swap(LHS, RHS);
6610 }
6611
6612 MachineInstr *FMA = nullptr;
6613 Register Z;
6614 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6615 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6616 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6617 TargetOpcode::G_FMUL) &&
6618 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6619 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6620 FMA = LHS.MI;
6621 Z = RHS.Reg;
6622 }
6623 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6624 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6625 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6626 TargetOpcode::G_FMUL) &&
6627 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6628 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6629 Z = LHS.Reg;
6630 FMA = RHS.MI;
6631 }
6632
6633 if (FMA) {
6634 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6635 Register X = FMA->getOperand(1).getReg();
6636 Register Y = FMA->getOperand(2).getReg();
6637 Register U = FMulMI->getOperand(1).getReg();
6638 Register V = FMulMI->getOperand(2).getReg();
6639
6640 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6641 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6642 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6643 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6644 {X, Y, InnerFMA});
6645 };
6646 return true;
6647 }
6648
6649 return false;
6650}
6651
6654 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6655 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6656
6657 bool AllowFusionGlobally, HasFMAD, Aggressive;
6658 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6659 return false;
6660
6661 if (!Aggressive)
6662 return false;
6663
6664 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6665 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6666 Register Op1 = MI.getOperand(1).getReg();
6667 Register Op2 = MI.getOperand(2).getReg();
6668 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6669 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6670
6671 unsigned PreferredFusedOpcode =
6672 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6673
6674 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6675 // prefer to fold the multiply with fewer uses.
6676 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6677 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6678 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6679 std::swap(LHS, RHS);
6680 }
6681
6682 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6683 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6685 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6686 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6687 Register InnerFMA =
6688 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6689 .getReg(0);
6690 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6691 {X, Y, InnerFMA});
6692 };
6693
6694 MachineInstr *FMulMI, *FMAMI;
6695 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6696 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6697 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6698 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6699 m_GFPExt(m_MInstr(FMulMI))) &&
6700 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6701 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6702 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6703 MatchInfo = [=](MachineIRBuilder &B) {
6704 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6705 FMulMI->getOperand(2).getReg(), RHS.Reg,
6706 LHS.MI->getOperand(1).getReg(),
6707 LHS.MI->getOperand(2).getReg(), B);
6708 };
6709 return true;
6710 }
6711
6712 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6713 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6714 // FIXME: This turns two single-precision and one double-precision
6715 // operation into two double-precision operations, which might not be
6716 // interesting for all targets, especially GPUs.
6717 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6718 FMAMI->getOpcode() == PreferredFusedOpcode) {
6719 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6720 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6721 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6722 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6723 MatchInfo = [=](MachineIRBuilder &B) {
6724 Register X = FMAMI->getOperand(1).getReg();
6725 Register Y = FMAMI->getOperand(2).getReg();
6726 X = B.buildFPExt(DstType, X).getReg(0);
6727 Y = B.buildFPExt(DstType, Y).getReg(0);
6728 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6729 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6730 };
6731
6732 return true;
6733 }
6734 }
6735
6736 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6737 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6738 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6739 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6740 m_GFPExt(m_MInstr(FMulMI))) &&
6741 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6742 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6743 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6744 MatchInfo = [=](MachineIRBuilder &B) {
6745 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6746 FMulMI->getOperand(2).getReg(), LHS.Reg,
6747 RHS.MI->getOperand(1).getReg(),
6748 RHS.MI->getOperand(2).getReg(), B);
6749 };
6750 return true;
6751 }
6752
6753 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6754 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6755 // FIXME: This turns two single-precision and one double-precision
6756 // operation into two double-precision operations, which might not be
6757 // interesting for all targets, especially GPUs.
6758 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6759 FMAMI->getOpcode() == PreferredFusedOpcode) {
6760 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6761 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6762 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6763 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6764 MatchInfo = [=](MachineIRBuilder &B) {
6765 Register X = FMAMI->getOperand(1).getReg();
6766 Register Y = FMAMI->getOperand(2).getReg();
6767 X = B.buildFPExt(DstType, X).getReg(0);
6768 Y = B.buildFPExt(DstType, Y).getReg(0);
6769 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6770 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6771 };
6772 return true;
6773 }
6774 }
6775
6776 return false;
6777}
6778
6781 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6782 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6783
6784 bool AllowFusionGlobally, HasFMAD, Aggressive;
6785 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6786 return false;
6787
6788 Register Op1 = MI.getOperand(1).getReg();
6789 Register Op2 = MI.getOperand(2).getReg();
6790 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6791 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6792 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6793
6794 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6795 // prefer to fold the multiply with fewer uses.
6796 int FirstMulHasFewerUses = true;
6797 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6798 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6799 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6800 FirstMulHasFewerUses = false;
6801
6802 unsigned PreferredFusedOpcode =
6803 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6804
6805 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6806 if (FirstMulHasFewerUses &&
6807 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6808 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6809 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6810 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6811 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6812 {LHS.MI->getOperand(1).getReg(),
6813 LHS.MI->getOperand(2).getReg(), NegZ});
6814 };
6815 return true;
6816 }
6817 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6818 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6819 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6820 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6821 Register NegY =
6822 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6823 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6824 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6825 };
6826 return true;
6827 }
6828
6829 return false;
6830}
6831
6834 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6835 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6836
6837 bool AllowFusionGlobally, HasFMAD, Aggressive;
6838 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6839 return false;
6840
6841 Register LHSReg = MI.getOperand(1).getReg();
6842 Register RHSReg = MI.getOperand(2).getReg();
6843 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6844
6845 unsigned PreferredFusedOpcode =
6846 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6847
6848 MachineInstr *FMulMI;
6849 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6850 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6851 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6852 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6853 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6854 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6855 Register NegX =
6856 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6857 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6858 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6859 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6860 };
6861 return true;
6862 }
6863
6864 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6865 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6866 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6867 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6868 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6869 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6870 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6871 {FMulMI->getOperand(1).getReg(),
6872 FMulMI->getOperand(2).getReg(), LHSReg});
6873 };
6874 return true;
6875 }
6876
6877 return false;
6878}
6879
6882 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6883 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6884
6885 bool AllowFusionGlobally, HasFMAD, Aggressive;
6886 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6887 return false;
6888
6889 Register LHSReg = MI.getOperand(1).getReg();
6890 Register RHSReg = MI.getOperand(2).getReg();
6891 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6892
6893 unsigned PreferredFusedOpcode =
6894 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6895
6896 MachineInstr *FMulMI;
6897 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6898 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6899 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6900 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6901 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6902 Register FpExtX =
6903 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6904 Register FpExtY =
6905 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6906 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6907 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6908 {FpExtX, FpExtY, NegZ});
6909 };
6910 return true;
6911 }
6912
6913 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6914 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6915 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6916 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6917 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6918 Register FpExtY =
6919 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6920 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6921 Register FpExtZ =
6922 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6923 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6924 {NegY, FpExtZ, LHSReg});
6925 };
6926 return true;
6927 }
6928
6929 return false;
6930}
6931
6934 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6935 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6936
6937 bool AllowFusionGlobally, HasFMAD, Aggressive;
6938 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6939 return false;
6940
6941 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6942 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6943 Register LHSReg = MI.getOperand(1).getReg();
6944 Register RHSReg = MI.getOperand(2).getReg();
6945
6946 unsigned PreferredFusedOpcode =
6947 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6948
6949 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6951 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6952 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6953 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6954 };
6955
6956 MachineInstr *FMulMI;
6957 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6958 // (fneg (fma (fpext x), (fpext y), z))
6959 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6960 // (fneg (fma (fpext x), (fpext y), z))
6961 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6962 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6963 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6964 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6965 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6966 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6967 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6968 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6969 FMulMI->getOperand(2).getReg(), RHSReg, B);
6970 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6971 };
6972 return true;
6973 }
6974
6975 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6976 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6977 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6978 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6979 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6980 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6981 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6982 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6983 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6984 FMulMI->getOperand(2).getReg(), LHSReg, B);
6985 };
6986 return true;
6987 }
6988
6989 return false;
6990}
6991
6993 unsigned &IdxToPropagate) const {
6994 bool PropagateNaN;
6995 switch (MI.getOpcode()) {
6996 default:
6997 return false;
6998 case TargetOpcode::G_FMINNUM:
6999 case TargetOpcode::G_FMAXNUM:
7000 PropagateNaN = false;
7001 break;
7002 case TargetOpcode::G_FMINIMUM:
7003 case TargetOpcode::G_FMAXIMUM:
7004 PropagateNaN = true;
7005 break;
7006 }
7007
7008 auto MatchNaN = [&](unsigned Idx) {
7009 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
7010 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
7011 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
7012 return false;
7013 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
7014 return true;
7015 };
7016
7017 return MatchNaN(1) || MatchNaN(2);
7018}
7019
7020// Combine multiple FDIVs with the same divisor into multiple FMULs by the
7021// reciprocal.
7022// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
7024 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
7025 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
7026
7027 Register X = MI.getOperand(1).getReg();
7028 Register Y = MI.getOperand(2).getReg();
7029
7030 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
7031 return false;
7032
7033 auto IsOne = [this](Register X) {
7034 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
7035 return N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0));
7036 };
7037
7038 // Skip if current node is a reciprocal/fneg-reciprocal.
7039 if (IsOne(X))
7040 return false;
7041
7042 // Exit early if the target does not want this transform or if there can't
7043 // possibly be enough uses of the divisor to make the transform worthwhile.
7044 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
7045 if (!MinUses)
7046 return false;
7047
7048 // Find all FDIV users of the same divisor. For the moment we limit all
7049 // instructions to a single BB and use the first Instr in MatchInfo as the
7050 // dominating position.
7051 MatchInfo.push_back(&MI);
7052 for (auto &U : MRI.use_nodbg_instructions(Y)) {
7053 if (&U == &MI || U.getParent() != MI.getParent())
7054 continue;
7055 if (U.getOpcode() == TargetOpcode::G_FDIV &&
7056 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y &&
7057 !IsOne(U.getOperand(1).getReg())) {
7058 // This division is eligible for optimization only if global unsafe math
7059 // is enabled or if this division allows reciprocal formation.
7060 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
7061 MatchInfo.push_back(&U);
7062 if (dominates(U, *MatchInfo[0]))
7063 std::swap(MatchInfo[0], MatchInfo.back());
7064 }
7065 }
7066 }
7067
7068 // Now that we have the actual number of divisor uses, make sure it meets
7069 // the minimum threshold specified by the target.
7070 return MatchInfo.size() >= MinUses;
7071}
7072
7074 SmallVector<MachineInstr *> &MatchInfo) const {
7075 // Generate the new div at the position of the first instruction, that we have
7076 // ensured will dominate all other instructions.
7077 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
7078 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
7079 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
7080 MatchInfo[0]->getOperand(2).getReg(),
7081 MatchInfo[0]->getFlags());
7082
7083 // Replace all found div's with fmul instructions.
7084 for (MachineInstr *MI : MatchInfo) {
7085 Builder.setInsertPt(*MI->getParent(), MI);
7086 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
7087 Div->getOperand(0).getReg(), MI->getFlags());
7088 MI->eraseFromParent();
7089 }
7090}
7091
7093 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
7094 Register LHS = MI.getOperand(1).getReg();
7095 Register RHS = MI.getOperand(2).getReg();
7096
7097 // Helper lambda to check for opportunities for
7098 // A + (B - A) -> B
7099 // (B - A) + A -> B
7100 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
7101 Register Reg;
7102 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
7103 Reg == MaybeSameReg;
7104 };
7105 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
7106}
7107
7109 Register &MatchInfo) const {
7110 // This combine folds the following patterns:
7111 //
7112 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
7113 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
7114 // into
7115 // x
7116 // if
7117 // k == sizeof(VecEltTy)/2
7118 // type(x) == type(dst)
7119 //
7120 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
7121 // into
7122 // x
7123 // if
7124 // type(x) == type(dst)
7125
7126 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
7127 LLT DstEltTy = DstVecTy.getElementType();
7128
7129 Register Lo, Hi;
7130
7131 if (mi_match(
7132 MI, MRI,
7134 MatchInfo = Lo;
7135 return MRI.getType(MatchInfo) == DstVecTy;
7136 }
7137
7138 std::optional<ValueAndVReg> ShiftAmount;
7139 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7140 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7141 if (mi_match(
7142 MI, MRI,
7143 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7144 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7145 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7146 MatchInfo = Lo;
7147 return MRI.getType(MatchInfo) == DstVecTy;
7148 }
7149 }
7150
7151 return false;
7152}
7153
7155 Register &MatchInfo) const {
7156 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7157 // if type(x) == type(G_TRUNC)
7158 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7159 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7160 return false;
7161
7162 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7163}
7164
7166 Register &MatchInfo) const {
7167 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7168 // y if K == size of vector element type
7169 std::optional<ValueAndVReg> ShiftAmt;
7170 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7172 m_GCst(ShiftAmt))))
7173 return false;
7174
7175 LLT MatchTy = MRI.getType(MatchInfo);
7176 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7177 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7178}
7179
7180unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7181 CmpInst::Predicate Pred, LLT DstTy,
7182 SelectPatternNaNBehaviour VsNaNRetVal) const {
7183 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7184 "Expected a NaN behaviour?");
7185 // Choose an opcode based off of legality or the behaviour when one of the
7186 // LHS/RHS may be NaN.
7187 switch (Pred) {
7188 default:
7189 return 0;
7190 case CmpInst::FCMP_UGT:
7191 case CmpInst::FCMP_UGE:
7192 case CmpInst::FCMP_OGT:
7193 case CmpInst::FCMP_OGE:
7194 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7195 return TargetOpcode::G_FMAXNUM;
7196 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7197 return TargetOpcode::G_FMAXIMUM;
7198 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7199 return TargetOpcode::G_FMAXNUM;
7200 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7201 return TargetOpcode::G_FMAXIMUM;
7202 return 0;
7203 case CmpInst::FCMP_ULT:
7204 case CmpInst::FCMP_ULE:
7205 case CmpInst::FCMP_OLT:
7206 case CmpInst::FCMP_OLE:
7207 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7208 return TargetOpcode::G_FMINNUM;
7209 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7210 return TargetOpcode::G_FMINIMUM;
7211 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7212 return TargetOpcode::G_FMINNUM;
7213 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7214 return 0;
7215 return TargetOpcode::G_FMINIMUM;
7216 }
7217}
7218
7219CombinerHelper::SelectPatternNaNBehaviour
7220CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7221 bool IsOrderedComparison) const {
7222 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7223 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7224 // Completely unsafe.
7225 if (!LHSSafe && !RHSSafe)
7226 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7227 if (LHSSafe && RHSSafe)
7228 return SelectPatternNaNBehaviour::RETURNS_ANY;
7229 // An ordered comparison will return false when given a NaN, so it
7230 // returns the RHS.
7231 if (IsOrderedComparison)
7232 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7233 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7234 // An unordered comparison will return true when given a NaN, so it
7235 // returns the LHS.
7236 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7237 : SelectPatternNaNBehaviour::RETURNS_NAN;
7238}
7239
7240bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7241 Register TrueVal, Register FalseVal,
7242 BuildFnTy &MatchInfo) const {
7243 // Match: select (fcmp cond x, y) x, y
7244 // select (fcmp cond x, y) y, x
7245 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7246 LLT DstTy = MRI.getType(Dst);
7247 // Bail out early on pointers, since we'll never want to fold to a min/max.
7248 if (DstTy.isPointer())
7249 return false;
7250 // Match a floating point compare with a less-than/greater-than predicate.
7251 // TODO: Allow multiple users of the compare if they are all selects.
7252 CmpInst::Predicate Pred;
7253 Register CmpLHS, CmpRHS;
7254 if (!mi_match(Cond, MRI,
7256 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7257 CmpInst::isEquality(Pred))
7258 return false;
7259 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7260 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7261 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7262 return false;
7263 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7264 std::swap(CmpLHS, CmpRHS);
7265 Pred = CmpInst::getSwappedPredicate(Pred);
7266 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7267 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7268 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7269 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7270 }
7271 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7272 return false;
7273 // Decide what type of max/min this should be based off of the predicate.
7274 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7275 if (!Opc || !isLegal({Opc, {DstTy}}))
7276 return false;
7277 // Comparisons between signed zero and zero may have different results...
7278 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7279 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7280 // We don't know if a comparison between two 0s will give us a consistent
7281 // result. Be conservative and only proceed if at least one side is
7282 // non-zero.
7283 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7284 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7285 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7286 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7287 return false;
7288 }
7289 }
7290 MatchInfo = [=](MachineIRBuilder &B) {
7291 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7292 };
7293 return true;
7294}
7295
7297 BuildFnTy &MatchInfo) const {
7298 // TODO: Handle integer cases.
7299 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7300 // Condition may be fed by a truncated compare.
7301 Register Cond = MI.getOperand(1).getReg();
7302 Register MaybeTrunc;
7303 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7304 Cond = MaybeTrunc;
7305 Register Dst = MI.getOperand(0).getReg();
7306 Register TrueVal = MI.getOperand(2).getReg();
7307 Register FalseVal = MI.getOperand(3).getReg();
7308 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7309}
7310
7312 BuildFnTy &MatchInfo) const {
7313 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7314 // (X + Y) == X --> Y == 0
7315 // (X + Y) != X --> Y != 0
7316 // (X - Y) == X --> Y == 0
7317 // (X - Y) != X --> Y != 0
7318 // (X ^ Y) == X --> Y == 0
7319 // (X ^ Y) != X --> Y != 0
7320 Register Dst = MI.getOperand(0).getReg();
7321 CmpInst::Predicate Pred;
7322 Register X, Y, OpLHS, OpRHS;
7323 bool MatchedSub = mi_match(
7324 Dst, MRI,
7325 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7326 if (MatchedSub && X != OpLHS)
7327 return false;
7328 if (!MatchedSub) {
7329 if (!mi_match(Dst, MRI,
7330 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7331 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7332 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7333 return false;
7334 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7335 }
7336 MatchInfo = [=](MachineIRBuilder &B) {
7337 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7338 B.buildICmp(Pred, Dst, Y, Zero);
7339 };
7340 return CmpInst::isEquality(Pred) && Y.isValid();
7341}
7342
7343/// Return the minimum useless shift amount that results in complete loss of the
7344/// source value. Return std::nullopt when it cannot determine a value.
7345static std::optional<unsigned>
7346getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7347 std::optional<int64_t> &Result) {
7348 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7349 Opcode == TargetOpcode::G_ASHR) &&
7350 "Expect G_SHL, G_LSHR or G_ASHR.");
7351 auto SignificantBits = 0;
7352 switch (Opcode) {
7353 case TargetOpcode::G_SHL:
7354 SignificantBits = ValueKB.countMinTrailingZeros();
7355 Result = 0;
7356 break;
7357 case TargetOpcode::G_LSHR:
7358 Result = 0;
7359 SignificantBits = ValueKB.countMinLeadingZeros();
7360 break;
7361 case TargetOpcode::G_ASHR:
7362 if (ValueKB.isNonNegative()) {
7363 SignificantBits = ValueKB.countMinLeadingZeros();
7364 Result = 0;
7365 } else if (ValueKB.isNegative()) {
7366 SignificantBits = ValueKB.countMinLeadingOnes();
7367 Result = -1;
7368 } else {
7369 // Cannot determine shift result.
7370 Result = std::nullopt;
7371 }
7372 break;
7373 default:
7374 break;
7375 }
7376 return ValueKB.getBitWidth() - SignificantBits;
7377}
7378
7380 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7381 Register ShiftVal = MI.getOperand(1).getReg();
7382 Register ShiftReg = MI.getOperand(2).getReg();
7383 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7384 auto IsShiftTooBig = [&](const Constant *C) {
7385 auto *CI = dyn_cast<ConstantInt>(C);
7386 if (!CI)
7387 return false;
7388 if (CI->uge(ResTy.getScalarSizeInBits())) {
7389 MatchInfo = std::nullopt;
7390 return true;
7391 }
7392 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7393 MI.getOpcode(), MatchInfo);
7394 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7395 };
7396 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7397}
7398
7400 unsigned LHSOpndIdx = 1;
7401 unsigned RHSOpndIdx = 2;
7402 switch (MI.getOpcode()) {
7403 case TargetOpcode::G_UADDO:
7404 case TargetOpcode::G_SADDO:
7405 case TargetOpcode::G_UMULO:
7406 case TargetOpcode::G_SMULO:
7407 LHSOpndIdx = 2;
7408 RHSOpndIdx = 3;
7409 break;
7410 default:
7411 break;
7412 }
7413 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7414 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7415 if (!getIConstantVRegVal(LHS, MRI)) {
7416 // Skip commuting if LHS is not a constant. But, LHS may be a
7417 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7418 // have a constant on the RHS.
7419 if (MRI.getVRegDef(LHS)->getOpcode() !=
7420 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7421 return false;
7422 }
7423 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7424 return MRI.getVRegDef(RHS)->getOpcode() !=
7425 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7426 !getIConstantVRegVal(RHS, MRI);
7427}
7428
7430 Register LHS = MI.getOperand(1).getReg();
7431 Register RHS = MI.getOperand(2).getReg();
7432 std::optional<FPValueAndVReg> ValAndVReg;
7433 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7434 return false;
7435 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7436}
7437
7439 Observer.changingInstr(MI);
7440 unsigned LHSOpndIdx = 1;
7441 unsigned RHSOpndIdx = 2;
7442 switch (MI.getOpcode()) {
7443 case TargetOpcode::G_UADDO:
7444 case TargetOpcode::G_SADDO:
7445 case TargetOpcode::G_UMULO:
7446 case TargetOpcode::G_SMULO:
7447 LHSOpndIdx = 2;
7448 RHSOpndIdx = 3;
7449 break;
7450 default:
7451 break;
7452 }
7453 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7454 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7455 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7456 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7457 Observer.changedInstr(MI);
7458}
7459
7460bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7461 LLT SrcTy = MRI.getType(Src);
7462 if (SrcTy.isFixedVector())
7463 return isConstantSplatVector(Src, 1, AllowUndefs);
7464 if (SrcTy.isScalar()) {
7465 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7466 return true;
7467 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7468 return IConstant && IConstant->Value == 1;
7469 }
7470 return false; // scalable vector
7471}
7472
7473bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7474 LLT SrcTy = MRI.getType(Src);
7475 if (SrcTy.isFixedVector())
7476 return isConstantSplatVector(Src, 0, AllowUndefs);
7477 if (SrcTy.isScalar()) {
7478 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7479 return true;
7480 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7481 return IConstant && IConstant->Value == 0;
7482 }
7483 return false; // scalable vector
7484}
7485
7486// Ignores COPYs during conformance checks.
7487// FIXME scalable vectors.
7488bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7489 bool AllowUndefs) const {
7490 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7491 if (!BuildVector)
7492 return false;
7493 unsigned NumSources = BuildVector->getNumSources();
7494
7495 for (unsigned I = 0; I < NumSources; ++I) {
7496 GImplicitDef *ImplicitDef =
7498 if (ImplicitDef && AllowUndefs)
7499 continue;
7500 if (ImplicitDef && !AllowUndefs)
7501 return false;
7502 std::optional<ValueAndVReg> IConstant =
7504 if (IConstant && IConstant->Value == SplatValue)
7505 continue;
7506 return false;
7507 }
7508 return true;
7509}
7510
7511// Ignores COPYs during lookups.
7512// FIXME scalable vectors
7513std::optional<APInt>
7514CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7515 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7516 if (IConstant)
7517 return IConstant->Value;
7518
7519 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7520 if (!BuildVector)
7521 return std::nullopt;
7522 unsigned NumSources = BuildVector->getNumSources();
7523
7524 std::optional<APInt> Value = std::nullopt;
7525 for (unsigned I = 0; I < NumSources; ++I) {
7526 std::optional<ValueAndVReg> IConstant =
7528 if (!IConstant)
7529 return std::nullopt;
7530 if (!Value)
7531 Value = IConstant->Value;
7532 else if (*Value != IConstant->Value)
7533 return std::nullopt;
7534 }
7535 return Value;
7536}
7537
7538// FIXME G_SPLAT_VECTOR
7539bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7540 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7541 if (IConstant)
7542 return true;
7543
7544 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7545 if (!BuildVector)
7546 return false;
7547
7548 unsigned NumSources = BuildVector->getNumSources();
7549 for (unsigned I = 0; I < NumSources; ++I) {
7550 std::optional<ValueAndVReg> IConstant =
7552 if (!IConstant)
7553 return false;
7554 }
7555 return true;
7556}
7557
7558// TODO: use knownbits to determine zeros
7559bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7560 BuildFnTy &MatchInfo) const {
7561 uint32_t Flags = Select->getFlags();
7562 Register Dest = Select->getReg(0);
7563 Register Cond = Select->getCondReg();
7564 Register True = Select->getTrueReg();
7565 Register False = Select->getFalseReg();
7566 LLT CondTy = MRI.getType(Select->getCondReg());
7567 LLT TrueTy = MRI.getType(Select->getTrueReg());
7568
7569 // We only do this combine for scalar boolean conditions.
7570 if (CondTy != LLT::scalar(1))
7571 return false;
7572
7573 if (TrueTy.isPointer())
7574 return false;
7575
7576 // Both are scalars.
7577 std::optional<ValueAndVReg> TrueOpt =
7579 std::optional<ValueAndVReg> FalseOpt =
7581
7582 if (!TrueOpt || !FalseOpt)
7583 return false;
7584
7585 APInt TrueValue = TrueOpt->Value;
7586 APInt FalseValue = FalseOpt->Value;
7587
7588 // select Cond, 1, 0 --> zext (Cond)
7589 if (TrueValue.isOne() && FalseValue.isZero()) {
7590 MatchInfo = [=](MachineIRBuilder &B) {
7591 B.setInstrAndDebugLoc(*Select);
7592 B.buildZExtOrTrunc(Dest, Cond);
7593 };
7594 return true;
7595 }
7596
7597 // select Cond, -1, 0 --> sext (Cond)
7598 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7599 MatchInfo = [=](MachineIRBuilder &B) {
7600 B.setInstrAndDebugLoc(*Select);
7601 B.buildSExtOrTrunc(Dest, Cond);
7602 };
7603 return true;
7604 }
7605
7606 // select Cond, 0, 1 --> zext (!Cond)
7607 if (TrueValue.isZero() && FalseValue.isOne()) {
7608 MatchInfo = [=](MachineIRBuilder &B) {
7609 B.setInstrAndDebugLoc(*Select);
7610 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7611 B.buildNot(Inner, Cond);
7612 B.buildZExtOrTrunc(Dest, Inner);
7613 };
7614 return true;
7615 }
7616
7617 // select Cond, 0, -1 --> sext (!Cond)
7618 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7619 MatchInfo = [=](MachineIRBuilder &B) {
7620 B.setInstrAndDebugLoc(*Select);
7621 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7622 B.buildNot(Inner, Cond);
7623 B.buildSExtOrTrunc(Dest, Inner);
7624 };
7625 return true;
7626 }
7627
7628 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7629 if (TrueValue - 1 == FalseValue) {
7630 MatchInfo = [=](MachineIRBuilder &B) {
7631 B.setInstrAndDebugLoc(*Select);
7632 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7633 B.buildZExtOrTrunc(Inner, Cond);
7634 B.buildAdd(Dest, Inner, False);
7635 };
7636 return true;
7637 }
7638
7639 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7640 if (TrueValue + 1 == FalseValue) {
7641 MatchInfo = [=](MachineIRBuilder &B) {
7642 B.setInstrAndDebugLoc(*Select);
7643 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7644 B.buildSExtOrTrunc(Inner, Cond);
7645 B.buildAdd(Dest, Inner, False);
7646 };
7647 return true;
7648 }
7649
7650 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7651 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7652 MatchInfo = [=](MachineIRBuilder &B) {
7653 B.setInstrAndDebugLoc(*Select);
7654 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7655 B.buildZExtOrTrunc(Inner, Cond);
7656 // The shift amount must be scalar.
7657 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7658 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7659 B.buildShl(Dest, Inner, ShAmtC, Flags);
7660 };
7661 return true;
7662 }
7663
7664 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7665 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7666 MatchInfo = [=](MachineIRBuilder &B) {
7667 B.setInstrAndDebugLoc(*Select);
7668 Register Not = MRI.createGenericVirtualRegister(CondTy);
7669 B.buildNot(Not, Cond);
7670 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7671 B.buildZExtOrTrunc(Inner, Not);
7672 // The shift amount must be scalar.
7673 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7674 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7675 B.buildShl(Dest, Inner, ShAmtC, Flags);
7676 };
7677 return true;
7678 }
7679
7680 // select Cond, -1, C --> or (sext Cond), C
7681 if (TrueValue.isAllOnes()) {
7682 MatchInfo = [=](MachineIRBuilder &B) {
7683 B.setInstrAndDebugLoc(*Select);
7684 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7685 B.buildSExtOrTrunc(Inner, Cond);
7686 B.buildOr(Dest, Inner, False, Flags);
7687 };
7688 return true;
7689 }
7690
7691 // select Cond, C, -1 --> or (sext (not Cond)), C
7692 if (FalseValue.isAllOnes()) {
7693 MatchInfo = [=](MachineIRBuilder &B) {
7694 B.setInstrAndDebugLoc(*Select);
7695 Register Not = MRI.createGenericVirtualRegister(CondTy);
7696 B.buildNot(Not, Cond);
7697 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7698 B.buildSExtOrTrunc(Inner, Not);
7699 B.buildOr(Dest, Inner, True, Flags);
7700 };
7701 return true;
7702 }
7703
7704 return false;
7705}
7706
7707// TODO: use knownbits to determine zeros
7708bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7709 BuildFnTy &MatchInfo) const {
7710 uint32_t Flags = Select->getFlags();
7711 Register DstReg = Select->getReg(0);
7712 Register Cond = Select->getCondReg();
7713 Register True = Select->getTrueReg();
7714 Register False = Select->getFalseReg();
7715 LLT CondTy = MRI.getType(Select->getCondReg());
7716 LLT TrueTy = MRI.getType(Select->getTrueReg());
7717
7718 // Boolean or fixed vector of booleans.
7719 if (CondTy.isScalableVector() ||
7720 (CondTy.isFixedVector() &&
7721 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7722 CondTy.getScalarSizeInBits() != 1)
7723 return false;
7724
7725 if (CondTy != TrueTy)
7726 return false;
7727
7728 // select Cond, Cond, F --> or Cond, F
7729 // select Cond, 1, F --> or Cond, F
7730 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7731 MatchInfo = [=](MachineIRBuilder &B) {
7732 B.setInstrAndDebugLoc(*Select);
7733 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7734 B.buildZExtOrTrunc(Ext, Cond);
7735 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7736 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7737 };
7738 return true;
7739 }
7740
7741 // select Cond, T, Cond --> and Cond, T
7742 // select Cond, T, 0 --> and Cond, T
7743 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7744 MatchInfo = [=](MachineIRBuilder &B) {
7745 B.setInstrAndDebugLoc(*Select);
7746 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7747 B.buildZExtOrTrunc(Ext, Cond);
7748 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7749 B.buildAnd(DstReg, Ext, FreezeTrue);
7750 };
7751 return true;
7752 }
7753
7754 // select Cond, T, 1 --> or (not Cond), T
7755 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7756 MatchInfo = [=](MachineIRBuilder &B) {
7757 B.setInstrAndDebugLoc(*Select);
7758 // First the not.
7759 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7760 B.buildNot(Inner, Cond);
7761 // Then an ext to match the destination register.
7762 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7763 B.buildZExtOrTrunc(Ext, Inner);
7764 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7765 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7766 };
7767 return true;
7768 }
7769
7770 // select Cond, 0, F --> and (not Cond), F
7771 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7772 MatchInfo = [=](MachineIRBuilder &B) {
7773 B.setInstrAndDebugLoc(*Select);
7774 // First the not.
7775 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7776 B.buildNot(Inner, Cond);
7777 // Then an ext to match the destination register.
7778 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7779 B.buildZExtOrTrunc(Ext, Inner);
7780 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7781 B.buildAnd(DstReg, Ext, FreezeFalse);
7782 };
7783 return true;
7784 }
7785
7786 return false;
7787}
7788
7790 BuildFnTy &MatchInfo) const {
7791 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7792 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7793
7794 Register DstReg = Select->getReg(0);
7795 Register True = Select->getTrueReg();
7796 Register False = Select->getFalseReg();
7797 LLT DstTy = MRI.getType(DstReg);
7798
7799 if (DstTy.isPointerOrPointerVector())
7800 return false;
7801
7802 // We want to fold the icmp and replace the select.
7803 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7804 return false;
7805
7806 CmpInst::Predicate Pred = Cmp->getCond();
7807 // We need a larger or smaller predicate for
7808 // canonicalization.
7809 if (CmpInst::isEquality(Pred))
7810 return false;
7811
7812 Register CmpLHS = Cmp->getLHSReg();
7813 Register CmpRHS = Cmp->getRHSReg();
7814
7815 // We can swap CmpLHS and CmpRHS for higher hitrate.
7816 if (True == CmpRHS && False == CmpLHS) {
7817 std::swap(CmpLHS, CmpRHS);
7818 Pred = CmpInst::getSwappedPredicate(Pred);
7819 }
7820
7821 // (icmp X, Y) ? X : Y -> integer minmax.
7822 // see matchSelectPattern in ValueTracking.
7823 // Legality between G_SELECT and integer minmax can differ.
7824 if (True != CmpLHS || False != CmpRHS)
7825 return false;
7826
7827 switch (Pred) {
7828 case ICmpInst::ICMP_UGT:
7829 case ICmpInst::ICMP_UGE: {
7830 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7831 return false;
7832 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7833 return true;
7834 }
7835 case ICmpInst::ICMP_SGT:
7836 case ICmpInst::ICMP_SGE: {
7837 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7838 return false;
7839 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7840 return true;
7841 }
7842 case ICmpInst::ICMP_ULT:
7843 case ICmpInst::ICMP_ULE: {
7844 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7845 return false;
7846 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7847 return true;
7848 }
7849 case ICmpInst::ICMP_SLT:
7850 case ICmpInst::ICMP_SLE: {
7851 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7852 return false;
7853 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7854 return true;
7855 }
7856 default:
7857 return false;
7858 }
7859}
7860
7861// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7863 BuildFnTy &MatchInfo) const {
7864 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7865 Register DestReg = MI.getOperand(0).getReg();
7866 LLT DestTy = MRI.getType(DestReg);
7867
7868 Register X;
7869 Register Sub0;
7870 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7871 if (mi_match(DestReg, MRI,
7872 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7873 m_GSMax(m_Reg(X), NegPattern),
7874 m_GUMin(m_Reg(X), NegPattern),
7875 m_GUMax(m_Reg(X), NegPattern)))))) {
7876 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7877 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7878 if (isLegal({NewOpc, {DestTy}})) {
7879 MatchInfo = [=](MachineIRBuilder &B) {
7880 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7881 };
7882 return true;
7883 }
7884 }
7885
7886 return false;
7887}
7888
7891
7892 if (tryFoldSelectOfConstants(Select, MatchInfo))
7893 return true;
7894
7895 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7896 return true;
7897
7898 return false;
7899}
7900
7901/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7902/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7903/// into a single comparison using range-based reasoning.
7904/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7905bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7906 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7907 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7908 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7909 Register DstReg = Logic->getReg(0);
7910 Register LHS = Logic->getLHSReg();
7911 Register RHS = Logic->getRHSReg();
7912 unsigned Flags = Logic->getFlags();
7913
7914 // We need an G_ICMP on the LHS register.
7915 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7916 if (!Cmp1)
7917 return false;
7918
7919 // We need an G_ICMP on the RHS register.
7920 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7921 if (!Cmp2)
7922 return false;
7923
7924 // We want to fold the icmps.
7925 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7926 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7927 return false;
7928
7929 APInt C1;
7930 APInt C2;
7931 std::optional<ValueAndVReg> MaybeC1 =
7933 if (!MaybeC1)
7934 return false;
7935 C1 = MaybeC1->Value;
7936
7937 std::optional<ValueAndVReg> MaybeC2 =
7939 if (!MaybeC2)
7940 return false;
7941 C2 = MaybeC2->Value;
7942
7943 Register R1 = Cmp1->getLHSReg();
7944 Register R2 = Cmp2->getLHSReg();
7945 CmpInst::Predicate Pred1 = Cmp1->getCond();
7946 CmpInst::Predicate Pred2 = Cmp2->getCond();
7947 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7948 LLT CmpOperandTy = MRI.getType(R1);
7949
7950 if (CmpOperandTy.isPointer())
7951 return false;
7952
7953 // We build ands, adds, and constants of type CmpOperandTy.
7954 // They must be legal to build.
7955 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7956 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7957 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7958 return false;
7959
7960 // Look through add of a constant offset on R1, R2, or both operands. This
7961 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7962 std::optional<APInt> Offset1;
7963 std::optional<APInt> Offset2;
7964 if (R1 != R2) {
7965 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7966 std::optional<ValueAndVReg> MaybeOffset1 =
7968 if (MaybeOffset1) {
7969 R1 = Add->getLHSReg();
7970 Offset1 = MaybeOffset1->Value;
7971 }
7972 }
7973 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7974 std::optional<ValueAndVReg> MaybeOffset2 =
7976 if (MaybeOffset2) {
7977 R2 = Add->getLHSReg();
7978 Offset2 = MaybeOffset2->Value;
7979 }
7980 }
7981 }
7982
7983 if (R1 != R2)
7984 return false;
7985
7986 // We calculate the icmp ranges including maybe offsets.
7987 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7988 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7989 if (Offset1)
7990 CR1 = CR1.subtract(*Offset1);
7991
7992 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7993 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7994 if (Offset2)
7995 CR2 = CR2.subtract(*Offset2);
7996
7997 bool CreateMask = false;
7998 APInt LowerDiff;
7999 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
8000 if (!CR) {
8001 // We need non-wrapping ranges.
8002 if (CR1.isWrappedSet() || CR2.isWrappedSet())
8003 return false;
8004
8005 // Check whether we have equal-size ranges that only differ by one bit.
8006 // In that case we can apply a mask to map one range onto the other.
8007 LowerDiff = CR1.getLower() ^ CR2.getLower();
8008 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
8009 APInt CR1Size = CR1.getUpper() - CR1.getLower();
8010 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
8011 CR1Size != CR2.getUpper() - CR2.getLower())
8012 return false;
8013
8014 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
8015 CreateMask = true;
8016 }
8017
8018 if (IsAnd)
8019 CR = CR->inverse();
8020
8021 CmpInst::Predicate NewPred;
8022 APInt NewC, Offset;
8023 CR->getEquivalentICmp(NewPred, NewC, Offset);
8024
8025 // We take the result type of one of the original icmps, CmpTy, for
8026 // the to be build icmp. The operand type, CmpOperandTy, is used for
8027 // the other instructions and constants to be build. The types of
8028 // the parameters and output are the same for add and and. CmpTy
8029 // and the type of DstReg might differ. That is why we zext or trunc
8030 // the icmp into the destination register.
8031
8032 MatchInfo = [=](MachineIRBuilder &B) {
8033 if (CreateMask && Offset != 0) {
8034 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8035 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8036 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8037 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
8038 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8039 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8040 B.buildZExtOrTrunc(DstReg, ICmp);
8041 } else if (CreateMask && Offset == 0) {
8042 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
8043 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
8044 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8045 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
8046 B.buildZExtOrTrunc(DstReg, ICmp);
8047 } else if (!CreateMask && Offset != 0) {
8048 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
8049 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
8050 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8051 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
8052 B.buildZExtOrTrunc(DstReg, ICmp);
8053 } else if (!CreateMask && Offset == 0) {
8054 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
8055 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
8056 B.buildZExtOrTrunc(DstReg, ICmp);
8057 } else {
8058 llvm_unreachable("unexpected configuration of CreateMask and Offset");
8059 }
8060 };
8061 return true;
8062}
8063
8064bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
8065 BuildFnTy &MatchInfo) const {
8066 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
8067 Register DestReg = Logic->getReg(0);
8068 Register LHS = Logic->getLHSReg();
8069 Register RHS = Logic->getRHSReg();
8070 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
8071
8072 // We need a compare on the LHS register.
8073 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
8074 if (!Cmp1)
8075 return false;
8076
8077 // We need a compare on the RHS register.
8078 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
8079 if (!Cmp2)
8080 return false;
8081
8082 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
8083 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
8084
8085 // We build one fcmp, want to fold the fcmps, replace the logic op,
8086 // and the fcmps must have the same shape.
8088 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
8089 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
8090 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
8091 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
8092 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
8093 return false;
8094
8095 CmpInst::Predicate PredL = Cmp1->getCond();
8096 CmpInst::Predicate PredR = Cmp2->getCond();
8097 Register LHS0 = Cmp1->getLHSReg();
8098 Register LHS1 = Cmp1->getRHSReg();
8099 Register RHS0 = Cmp2->getLHSReg();
8100 Register RHS1 = Cmp2->getRHSReg();
8101
8102 if (LHS0 == RHS1 && LHS1 == RHS0) {
8103 // Swap RHS operands to match LHS.
8104 PredR = CmpInst::getSwappedPredicate(PredR);
8105 std::swap(RHS0, RHS1);
8106 }
8107
8108 if (LHS0 == RHS0 && LHS1 == RHS1) {
8109 // We determine the new predicate.
8110 unsigned CmpCodeL = getFCmpCode(PredL);
8111 unsigned CmpCodeR = getFCmpCode(PredR);
8112 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
8113 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
8114 MatchInfo = [=](MachineIRBuilder &B) {
8115 // The fcmp predicates fill the lower part of the enum.
8116 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
8117 if (Pred == FCmpInst::FCMP_FALSE &&
8119 auto False = B.buildConstant(CmpTy, 0);
8120 B.buildZExtOrTrunc(DestReg, False);
8121 } else if (Pred == FCmpInst::FCMP_TRUE &&
8123 auto True =
8124 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
8125 CmpTy.isVector() /*isVector*/,
8126 true /*isFP*/));
8127 B.buildZExtOrTrunc(DestReg, True);
8128 } else { // We take the predicate without predicate optimizations.
8129 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
8130 B.buildZExtOrTrunc(DestReg, Cmp);
8131 }
8132 };
8133 return true;
8134 }
8135
8136 return false;
8137}
8138
8140 GAnd *And = cast<GAnd>(&MI);
8141
8142 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8143 return true;
8144
8145 if (tryFoldLogicOfFCmps(And, MatchInfo))
8146 return true;
8147
8148 return false;
8149}
8150
8152 GOr *Or = cast<GOr>(&MI);
8153
8154 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8155 return true;
8156
8157 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8158 return true;
8159
8160 return false;
8161}
8162
8164 BuildFnTy &MatchInfo) const {
8166
8167 // Addo has no flags
8168 Register Dst = Add->getReg(0);
8169 Register Carry = Add->getReg(1);
8170 Register LHS = Add->getLHSReg();
8171 Register RHS = Add->getRHSReg();
8172 bool IsSigned = Add->isSigned();
8173 LLT DstTy = MRI.getType(Dst);
8174 LLT CarryTy = MRI.getType(Carry);
8175
8176 // Fold addo, if the carry is dead -> add, undef.
8177 if (MRI.use_nodbg_empty(Carry) &&
8178 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8179 MatchInfo = [=](MachineIRBuilder &B) {
8180 B.buildAdd(Dst, LHS, RHS);
8181 B.buildUndef(Carry);
8182 };
8183 return true;
8184 }
8185
8186 // Canonicalize constant to RHS.
8187 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8188 if (IsSigned) {
8189 MatchInfo = [=](MachineIRBuilder &B) {
8190 B.buildSAddo(Dst, Carry, RHS, LHS);
8191 };
8192 return true;
8193 }
8194 // !IsSigned
8195 MatchInfo = [=](MachineIRBuilder &B) {
8196 B.buildUAddo(Dst, Carry, RHS, LHS);
8197 };
8198 return true;
8199 }
8200
8201 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8202 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8203
8204 // Fold addo(c1, c2) -> c3, carry.
8205 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8207 bool Overflow;
8208 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8209 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8210 MatchInfo = [=](MachineIRBuilder &B) {
8211 B.buildConstant(Dst, Result);
8212 B.buildConstant(Carry, Overflow);
8213 };
8214 return true;
8215 }
8216
8217 // Fold (addo x, 0) -> x, no carry
8218 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8219 MatchInfo = [=](MachineIRBuilder &B) {
8220 B.buildCopy(Dst, LHS);
8221 B.buildConstant(Carry, 0);
8222 };
8223 return true;
8224 }
8225
8226 // Given 2 constant operands whose sum does not overflow:
8227 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8228 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8229 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8230 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8231 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8232 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8233 std::optional<APInt> MaybeAddRHS =
8234 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8235 if (MaybeAddRHS) {
8236 bool Overflow;
8237 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8238 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8239 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8240 if (IsSigned) {
8241 MatchInfo = [=](MachineIRBuilder &B) {
8242 auto ConstRHS = B.buildConstant(DstTy, NewC);
8243 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8244 };
8245 return true;
8246 }
8247 // !IsSigned
8248 MatchInfo = [=](MachineIRBuilder &B) {
8249 auto ConstRHS = B.buildConstant(DstTy, NewC);
8250 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8251 };
8252 return true;
8253 }
8254 }
8255 };
8256
8257 // We try to combine addo to non-overflowing add.
8258 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8260 return false;
8261
8262 // We try to combine uaddo to non-overflowing add.
8263 if (!IsSigned) {
8264 ConstantRange CRLHS =
8265 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8266 ConstantRange CRRHS =
8267 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8268
8269 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8271 return false;
8273 MatchInfo = [=](MachineIRBuilder &B) {
8274 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8275 B.buildConstant(Carry, 0);
8276 };
8277 return true;
8278 }
8281 MatchInfo = [=](MachineIRBuilder &B) {
8282 B.buildAdd(Dst, LHS, RHS);
8283 B.buildConstant(Carry, 1);
8284 };
8285 return true;
8286 }
8287 }
8288 return false;
8289 }
8290
8291 // We try to combine saddo to non-overflowing add.
8292
8293 // If LHS and RHS each have at least two sign bits, then there is no signed
8294 // overflow.
8295 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8296 MatchInfo = [=](MachineIRBuilder &B) {
8297 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8298 B.buildConstant(Carry, 0);
8299 };
8300 return true;
8301 }
8302
8303 ConstantRange CRLHS =
8304 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8305 ConstantRange CRRHS =
8306 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8307
8308 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8310 return false;
8312 MatchInfo = [=](MachineIRBuilder &B) {
8313 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8314 B.buildConstant(Carry, 0);
8315 };
8316 return true;
8317 }
8320 MatchInfo = [=](MachineIRBuilder &B) {
8321 B.buildAdd(Dst, LHS, RHS);
8322 B.buildConstant(Carry, 1);
8323 };
8324 return true;
8325 }
8326 }
8327
8328 return false;
8329}
8330
8332 BuildFnTy &MatchInfo) const {
8334 MatchInfo(Builder);
8335 Root->eraseFromParent();
8336}
8337
8339 int64_t Exponent) const {
8340 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8342}
8343
8345 int64_t Exponent) const {
8346 auto [Dst, Base] = MI.getFirst2Regs();
8347 LLT Ty = MRI.getType(Dst);
8348 int64_t ExpVal = Exponent;
8349
8350 if (ExpVal == 0) {
8351 Builder.buildFConstant(Dst, 1.0);
8352 MI.removeFromParent();
8353 return;
8354 }
8355
8356 if (ExpVal < 0)
8357 ExpVal = -ExpVal;
8358
8359 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8360 // to generate the multiply sequence. There are more optimal ways to do this
8361 // (for example, powi(x,15) generates one more multiply than it should), but
8362 // this has the benefit of being both really simple and much better than a
8363 // libcall.
8364 std::optional<SrcOp> Res;
8365 SrcOp CurSquare = Base;
8366 while (ExpVal > 0) {
8367 if (ExpVal & 1) {
8368 if (!Res)
8369 Res = CurSquare;
8370 else
8371 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8372 }
8373
8374 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8375 ExpVal >>= 1;
8376 }
8377
8378 // If the original exponent was negative, invert the result, producing
8379 // 1/(x*x*x).
8380 if (Exponent < 0)
8381 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8382 MI.getFlags());
8383
8384 Builder.buildCopy(Dst, *Res);
8385 MI.eraseFromParent();
8386}
8387
8389 BuildFnTy &MatchInfo) const {
8390 // fold (A+C1)-C2 -> A+(C1-C2)
8391 const GSub *Sub = cast<GSub>(&MI);
8392 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8393
8394 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8395 return false;
8396
8397 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8398 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8399
8400 Register Dst = Sub->getReg(0);
8401 LLT DstTy = MRI.getType(Dst);
8402
8403 MatchInfo = [=](MachineIRBuilder &B) {
8404 auto Const = B.buildConstant(DstTy, C1 - C2);
8405 B.buildAdd(Dst, Add->getLHSReg(), Const);
8406 };
8407
8408 return true;
8409}
8410
8412 BuildFnTy &MatchInfo) const {
8413 // fold C2-(A+C1) -> (C2-C1)-A
8414 const GSub *Sub = cast<GSub>(&MI);
8415 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8416
8417 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8418 return false;
8419
8420 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8421 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8422
8423 Register Dst = Sub->getReg(0);
8424 LLT DstTy = MRI.getType(Dst);
8425
8426 MatchInfo = [=](MachineIRBuilder &B) {
8427 auto Const = B.buildConstant(DstTy, C2 - C1);
8428 B.buildSub(Dst, Const, Add->getLHSReg());
8429 };
8430
8431 return true;
8432}
8433
8435 BuildFnTy &MatchInfo) const {
8436 // fold (A-C1)-C2 -> A-(C1+C2)
8437 const GSub *Sub1 = cast<GSub>(&MI);
8438 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8439
8440 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8441 return false;
8442
8443 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8444 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8445
8446 Register Dst = Sub1->getReg(0);
8447 LLT DstTy = MRI.getType(Dst);
8448
8449 MatchInfo = [=](MachineIRBuilder &B) {
8450 auto Const = B.buildConstant(DstTy, C1 + C2);
8451 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8452 };
8453
8454 return true;
8455}
8456
8458 BuildFnTy &MatchInfo) const {
8459 // fold (C1-A)-C2 -> (C1-C2)-A
8460 const GSub *Sub1 = cast<GSub>(&MI);
8461 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8462
8463 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8464 return false;
8465
8466 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8467 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8468
8469 Register Dst = Sub1->getReg(0);
8470 LLT DstTy = MRI.getType(Dst);
8471
8472 MatchInfo = [=](MachineIRBuilder &B) {
8473 auto Const = B.buildConstant(DstTy, C1 - C2);
8474 B.buildSub(Dst, Const, Sub2->getRHSReg());
8475 };
8476
8477 return true;
8478}
8479
8481 BuildFnTy &MatchInfo) const {
8482 // fold ((A-C1)+C2) -> (A+(C2-C1))
8483 const GAdd *Add = cast<GAdd>(&MI);
8484 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8485
8486 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8487 return false;
8488
8489 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8490 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8491
8492 Register Dst = Add->getReg(0);
8493 LLT DstTy = MRI.getType(Dst);
8494
8495 MatchInfo = [=](MachineIRBuilder &B) {
8496 auto Const = B.buildConstant(DstTy, C2 - C1);
8497 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8498 };
8499
8500 return true;
8501}
8502
8504 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8505 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8506
8507 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8508 return false;
8509
8510 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8511
8512 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8513
8514 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8515 // $any:_(<8 x s16>) = G_ANYEXT $bv
8516 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8517 //
8518 // ->
8519 //
8520 // $any:_(s16) = G_ANYEXT $bv[0]
8521 // $any1:_(s16) = G_ANYEXT $bv[1]
8522 // $any2:_(s16) = G_ANYEXT $bv[2]
8523 // $any3:_(s16) = G_ANYEXT $bv[3]
8524 // $any4:_(s16) = G_ANYEXT $bv[4]
8525 // $any5:_(s16) = G_ANYEXT $bv[5]
8526 // $any6:_(s16) = G_ANYEXT $bv[6]
8527 // $any7:_(s16) = G_ANYEXT $bv[7]
8528 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8529 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8530
8531 // We want to unmerge into vectors.
8532 if (!DstTy.isFixedVector())
8533 return false;
8534
8535 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8536 if (!Any)
8537 return false;
8538
8539 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8540
8541 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8542 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8543
8544 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8545 return false;
8546
8547 // FIXME: check element types?
8548 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8549 return false;
8550
8551 LLT BigBvTy = MRI.getType(BV->getReg(0));
8552 LLT SmallBvTy = DstTy;
8553 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8554
8556 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8557 return false;
8558
8559 // We check the legality of scalar anyext.
8561 {TargetOpcode::G_ANYEXT,
8562 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8563 return false;
8564
8565 MatchInfo = [=](MachineIRBuilder &B) {
8566 // Build into each G_UNMERGE_VALUES def
8567 // a small build vector with anyext from the source build vector.
8568 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8570 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8571 Register SourceArray =
8572 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8573 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8574 Ops.push_back(AnyExt.getReg(0));
8575 }
8576 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8577 };
8578 };
8579 return true;
8580 };
8581
8582 return false;
8583}
8584
8586 BuildFnTy &MatchInfo) const {
8587
8588 bool Changed = false;
8589 auto &Shuffle = cast<GShuffleVector>(MI);
8590 ArrayRef<int> OrigMask = Shuffle.getMask();
8591 SmallVector<int, 16> NewMask;
8592 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8593 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8594 const unsigned NumDstElts = OrigMask.size();
8595 for (unsigned i = 0; i != NumDstElts; ++i) {
8596 int Idx = OrigMask[i];
8597 if (Idx >= (int)NumSrcElems) {
8598 Idx = -1;
8599 Changed = true;
8600 }
8601 NewMask.push_back(Idx);
8602 }
8603
8604 if (!Changed)
8605 return false;
8606
8607 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8608 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8609 std::move(NewMask));
8610 };
8611
8612 return true;
8613}
8614
8615static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8616 const unsigned MaskSize = Mask.size();
8617 for (unsigned I = 0; I < MaskSize; ++I) {
8618 int Idx = Mask[I];
8619 if (Idx < 0)
8620 continue;
8621
8622 if (Idx < (int)NumElems)
8623 Mask[I] = Idx + NumElems;
8624 else
8625 Mask[I] = Idx - NumElems;
8626 }
8627}
8628
8630 BuildFnTy &MatchInfo) const {
8631
8632 auto &Shuffle = cast<GShuffleVector>(MI);
8633 // If any of the two inputs is already undef, don't check the mask again to
8634 // prevent infinite loop
8635 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8636 return false;
8637
8638 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8639 return false;
8640
8641 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8642 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8644 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8645 return false;
8646
8647 ArrayRef<int> Mask = Shuffle.getMask();
8648 const unsigned NumSrcElems = Src1Ty.getNumElements();
8649
8650 bool TouchesSrc1 = false;
8651 bool TouchesSrc2 = false;
8652 const unsigned NumElems = Mask.size();
8653 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8654 if (Mask[Idx] < 0)
8655 continue;
8656
8657 if (Mask[Idx] < (int)NumSrcElems)
8658 TouchesSrc1 = true;
8659 else
8660 TouchesSrc2 = true;
8661 }
8662
8663 if (TouchesSrc1 == TouchesSrc2)
8664 return false;
8665
8666 Register NewSrc1 = Shuffle.getSrc1Reg();
8667 SmallVector<int, 16> NewMask(Mask);
8668 if (TouchesSrc2) {
8669 NewSrc1 = Shuffle.getSrc2Reg();
8670 commuteMask(NewMask, NumSrcElems);
8671 }
8672
8673 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8674 auto Undef = B.buildUndef(Src1Ty);
8675 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8676 };
8677
8678 return true;
8679}
8680
8682 BuildFnTy &MatchInfo) const {
8683 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8684
8685 Register Dst = Subo->getReg(0);
8686 Register LHS = Subo->getLHSReg();
8687 Register RHS = Subo->getRHSReg();
8688 Register Carry = Subo->getCarryOutReg();
8689 LLT DstTy = MRI.getType(Dst);
8690 LLT CarryTy = MRI.getType(Carry);
8691
8692 // Check legality before known bits.
8693 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8695 return false;
8696
8697 ConstantRange KBLHS =
8698 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8699 /* IsSigned=*/Subo->isSigned());
8700 ConstantRange KBRHS =
8701 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8702 /* IsSigned=*/Subo->isSigned());
8703
8704 if (Subo->isSigned()) {
8705 // G_SSUBO
8706 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8708 return false;
8710 MatchInfo = [=](MachineIRBuilder &B) {
8711 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8712 B.buildConstant(Carry, 0);
8713 };
8714 return true;
8715 }
8718 MatchInfo = [=](MachineIRBuilder &B) {
8719 B.buildSub(Dst, LHS, RHS);
8720 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8721 /*isVector=*/CarryTy.isVector(),
8722 /*isFP=*/false));
8723 };
8724 return true;
8725 }
8726 }
8727 return false;
8728 }
8729
8730 // G_USUBO
8731 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8733 return false;
8735 MatchInfo = [=](MachineIRBuilder &B) {
8736 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8737 B.buildConstant(Carry, 0);
8738 };
8739 return true;
8740 }
8743 MatchInfo = [=](MachineIRBuilder &B) {
8744 B.buildSub(Dst, LHS, RHS);
8745 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8746 /*isVector=*/CarryTy.isVector(),
8747 /*isFP=*/false));
8748 };
8749 return true;
8750 }
8751 }
8752
8753 return false;
8754}
8755
8756// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8757// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8759 BuildFnTy &MatchInfo) const {
8760 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8761 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON) &&
8762 "Expected G_CTLZ variant");
8763
8764 const Register Dst = CtlzMI.getOperand(0).getReg();
8765 Register Src = CtlzMI.getOperand(1).getReg();
8766
8767 LLT Ty = MRI.getType(Dst);
8768 LLT SrcTy = MRI.getType(Src);
8769
8770 if (!(Ty.isValid() && Ty.isScalar()))
8771 return false;
8772
8773 if (!LI)
8774 return false;
8775
8776 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8777 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8778
8779 switch (LI->getAction(Query).Action) {
8780 default:
8781 return false;
8785 break;
8786 }
8787
8788 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8789 Register V;
8790 bool NeedAdd = true;
8791 if (mi_match(Src, MRI,
8793 m_SpecificICst(1))))) {
8794 NeedAdd = false;
8795 Src = V;
8796 }
8797
8798 unsigned BitWidth = Ty.getScalarSizeInBits();
8799
8800 Register X;
8801 if (!mi_match(Src, MRI,
8804 m_SpecificICst(BitWidth - 1)))))))
8805 return false;
8806
8807 MatchInfo = [=](MachineIRBuilder &B) {
8808 if (!NeedAdd) {
8809 B.buildCTLS(Dst, X);
8810 return;
8811 }
8812
8813 auto Ctls = B.buildCTLS(Ty, X);
8814 auto One = B.buildConstant(Ty, 1);
8815
8816 B.buildAdd(Dst, Ctls, One);
8817 };
8818
8819 return true;
8820}
8821
8822// Fold shr ( add ( ext X, ext Y ), 1 ) -> avgfloor ( x, y )
8823// Fold shr ( add ( ext X, ext Y, 1 ), 1 ) -> avgceil ( x, y )
8826 unsigned TargetOpc) const {
8827 assert((MI.getOpcode() == TargetOpcode::G_LSHR ||
8828 MI.getOpcode() == TargetOpcode::G_ASHR) &&
8829 "Expected G_LSHR/G_ASHR");
8830
8831 LLT XTy = MRI.getType(X);
8832 return XTy == MRI.getType(Y) && isLegal({TargetOpc, {XTy}});
8833}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:349
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:350
const fltSemantics & getSemantics() const
Definition APFloat.h:1552
bool isNaN() const
Definition APFloat.h:1542
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1300
APInt bitcastToAPInt() const
Definition APFloat.h:1436
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1300
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:755
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:754
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:742
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
LLVM_ABI void applyCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
LLVM_ABI void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
LLVM_ABI bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
LLVM_ABI bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
LLVM_ABI const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
LLVM_ABI void applyPtrAddZero(MachineInstr &MI) const
LLVM_ABI bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
LLVM_ABI void applyUDivOrURemByConst(MachineInstr &MI) const
LLVM_ABI bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
LLVM_ABI void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
LLVM_ABI bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
LLVM_ABI bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
LLVM_ABI bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchAVG(MachineInstr &MI, MachineRegisterInfo &MRI, Register X, Register Y, unsigned TargetOpc) const
LLVM_ABI bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
LLVM_ABI bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
LLVM_ABI bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
LLVM_ABI bool matchPtrAddZero(MachineInstr &MI) const
}
const TargetInstrInfo * TII
LLVM_ABI void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
LLVM_ABI void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
LLVM_ABI bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
LLVM_ABI bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
LLVM_ABI void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
LLVM_ABI bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
LLVM_ABI bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
LLVM_ABI bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
LLVM_ABI void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
LLVM_ABI void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
LLVM_ABI void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
LLVM_ABI bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
LLVM_ABI void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
LLVM_ABI void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
LLVM_ABI void applyCombineMemCpyFamily(MachineInstr &MI, MemCpyFamilyLoweringInfo &MatchInfo) const
LLVM_ABI bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
LLVM_ABI void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
LLVM_ABI bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
LLVM_ABI const DataLayout & getDataLayout() const
LLVM_ABI bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
LLVM_ABI bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
LLVM_ABI bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
LLVM_ABI void applyUMulHToLShr(MachineInstr &MI) const
LLVM_ABI void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
LLVM_ABI bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
LLVM_ABI bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
LLVM_ABI void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
LLVM_ABI bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
LLVM_ABI bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
LLVM_ABI bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
LLVM_ABI bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI const TargetLowering & getTargetLowering() const
LLVM_ABI bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
LLVM_ABI void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
LLVM_ABI void applySDivByPow2(MachineInstr &MI) const
LLVM_ABI void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
LLVM_ABI void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
LLVM_ABI bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
LLVM_ABI bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
LLVM_ABI bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
LLVM_ABI void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
LLVM_ABI bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
LLVM_ABI void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
LLVM_ABI void applyCombineCopy(MachineInstr &MI) const
LLVM_ABI bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
LLVM_ABI bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
LLVM_ABI void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
LLVM_ABI bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
LLVM_ABI bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
LLVM_ABI bool matchSextTruncSextLoad(MachineInstr &MI) const
LLVM_ABI bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
LLVM_ABI bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
LLVM_ABI bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
LLVM_ABI bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
LLVM_ABI bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
LLVM_ABI bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
LLVM_ABI bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
LLVM_ABI bool matchCombineCopy(MachineInstr &MI) const
LLVM_ABI bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
LLVM_ABI void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
LLVM_ABI bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
LLVM_ABI bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
LLVM_ABI void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
LLVM_ABI bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
LLVM_ABI void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
LLVM_ABI bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
LLVM_ABI bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchRedundantSExtInReg(MachineInstr &MI) const
LLVM_ABI void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
LLVM_ABI void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
LLVM_ABI bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
LLVM_ABI void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
LLVM_ABI bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
LLVM_ABI bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
LLVM_ABI void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
LLVM_ABI bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
LLVM_ABI bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
LLVM_ABI void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
LLVM_ABI bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
LLVM_ABI bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
LLVM_ABI bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
LLVM_ABI void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
LLVM_ABI bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
LLVM_ABI bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
LLVM_ABI bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
LLVM_ABI bool isPreLegalize() const
LLVM_ABI bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
LLVM_ABI bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
LLVM_ABI bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
LLVM_ABI bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
LLVM_ABI bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
LLVM_ABI bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
LLVM_ABI bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVM_ABI LLVMContext & getContext() const
LLVM_ABI void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
LLVM_ABI bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
LLVM_ABI bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
LLVM_ABI bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
LLVM_ABI bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
LLVM_ABI bool matchConstantFoldUnaryIntOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Constant fold a unary integer op (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON variants,...
LLVM_ABI void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
LLVM_ABI bool isLegal(const LegalityQuery &Query) const
LLVM_ABI bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
LLVM_ABI bool matchOperandIsKnownToBeAPowerOfTwo(const MachineOperand &MO, bool OrNegative=false) const
Check if operand MO is known to be a power of 2.
LLVM_ABI bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
LLVM_ABI void eraseInst(MachineInstr &MI) const
Erase MI.
LLVM_ABI bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
LLVM_ABI void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
LLVM_ABI bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
LLVM_ABI void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
LLVM_ABI void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
LLVM_ABI bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
LLVM_ABI MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
LLVM_ABI void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
LLVM_ABI bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
LLVM_ABI bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
LLVM_ABI bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
LLVM_ABI bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
LLVM_ABI bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
LLVM_ABI void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
LLVM_ABI void applyRotateOutOfRange(MachineInstr &MI) const
LLVM_ABI bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
LLVM_ABI bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
LLVM_ABI bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
LLVM_ABI bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
LLVM_ABI bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
LLVM_ABI bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
LLVM_ABI bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
LLVM_ABI bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
LLVM_ABI void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
LLVM_ABI bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
LLVM_ABI bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
LLVM_ABI bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
LLVM_ABI bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI bool matchRotateOutOfRange(MachineInstr &MI) const
LLVM_ABI void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
LLVM_ABI void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
LLVM_ABI bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
LLVM_ABI bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
LLVM_ABI void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
LLVM_ABI bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
LLVM_ABI bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
LLVM_ABI void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI void applySimplifySRemByPow2(MachineInstr &MI) const
Combine G_SREM x, (+/-2^k) to a bias-and-mask sequence.
LLVM_ABI bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
LLVM_ABI bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
LLVM_ABI void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
LLVM_ABI bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
LLVM_ABI void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
LLVM_ABI void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
LLVM_ABI bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
LLVM_ABI void applyFunnelShiftToRotate(MachineInstr &MI) const
LLVM_ABI bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
LLVM_ABI bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
LLVM_ABI bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
LLVM_ABI bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
LLVM_ABI bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
LLVM_ABI void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
LLVM_ABI bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
LLVM_ABI bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
LLVM_ABI void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
LLVM_ABI bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
LLVM_ABI bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
LLVM_ABI bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
LLVM_ABI bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
LLVM_ABI bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
LLVM_ABI bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
LLVM_ABI bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
LLVM_ABI bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
LLVM_ABI void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
LLVM_ABI bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
LLVM_ABI bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI void applySDivOrSRemByConst(MachineInstr &MI) const
LLVM_ABI bool matchCombineMemCpyFamily(MachineInstr &MI, MemCpyFamilyLoweringInfo &MatchInfo, unsigned MaxLen=0) const
LLVM_ABI MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
LLVM_ABI bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
LLVM_ABI bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
LLVM_ABI bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
LLVM_ABI bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
LLVM_ABI void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
LLVM_ABI void applyCommuteBinOpOperands(MachineInstr &MI) const
LLVM_ABI void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
LLVM_ABI void applySextTruncSextLoad(MachineInstr &MI) const
LLVM_ABI const MachineFunction & getMachineFunction() const
LLVM_ABI bool matchCombineBuildVectorOfBitcast(MachineInstr &MI, SmallVector< Register > &Ops) const
Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to G_BITCAST(G_BUILD_VECTOR(.....
LLVM_ABI bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
LLVM_ABI bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
LLVM_ABI void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
LLVM_ABI void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
LLVM_ABI bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
LLVM_ABI void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
LLVM_ABI bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
LLVM_ABI void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
LLVM_ABI bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
LLVM_ABI void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:143
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:354
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, Register Dst, Register Src, uint64_t KnownLen, Align Alignment, bool DstAlignCanChange, ArrayRef< LLT > MemOps)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1447
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1987
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:656
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:464
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:297
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1407
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1572
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:744
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1530
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1554
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:497
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1587
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1619
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:675
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:308
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1510
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:203
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
std::tuple< Register, Register, uint64_t, Align, bool, std::vector< LLT > > MemCpyFamilyLoweringInfo
Definition Utils.h:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1440
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:911
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI bool canLowerMemCpyFamily(const MachineInstr &MI, const MachineRegisterInfo &MRI, unsigned MaxLen, Register &Dst, Register &Src, uint64_t &KnownLen, Align &Alignment, bool &DstAlignCanChange, std::vector< LLT > &MemOps)
Matcher for memcpy-like instructions.
Definition Utils.cpp:2153
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:282
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:450
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1543
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1644
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:436
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:472
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI SmallVector< APInt > ConstantFoldUnaryIntOp(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI)
Tries to constant fold a unary integer operation (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON vari...
Definition Utils.cpp:948
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:504
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1425
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:242
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...