LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 if (matchCombineShuffleVector(MI, Ops)) {
310 return true;
311 }
312 return false;
313}
314
317 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
318 "Invalid instruction kind");
319 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
320 Register Src1 = MI.getOperand(1).getReg();
321 LLT SrcType = MRI.getType(Src1);
322 // As bizarre as it may look, shuffle vector can actually produce
323 // scalar! This is because at the IR level a <1 x ty> shuffle
324 // vector is perfectly valid.
325 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
326 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
327
328 // If the resulting vector is smaller than the size of the source
329 // vectors being concatenated, we won't be able to replace the
330 // shuffle vector into a concat_vectors.
331 //
332 // Note: We may still be able to produce a concat_vectors fed by
333 // extract_vector_elt and so on. It is less clear that would
334 // be better though, so don't bother for now.
335 //
336 // If the destination is a scalar, the size of the sources doesn't
337 // matter. we will lower the shuffle to a plain copy. This will
338 // work only if the source and destination have the same size. But
339 // that's covered by the next condition.
340 //
341 // TODO: If the size between the source and destination don't match
342 // we could still emit an extract vector element in that case.
343 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
344 return false;
345
346 // Check that the shuffle mask can be broken evenly between the
347 // different sources.
348 if (DstNumElts % SrcNumElts != 0)
349 return false;
350
351 // Mask length is a multiple of the source vector length.
352 // Check if the shuffle is some kind of concatenation of the input
353 // vectors.
354 unsigned NumConcat = DstNumElts / SrcNumElts;
355 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
356 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
357 for (unsigned i = 0; i != DstNumElts; ++i) {
358 int Idx = Mask[i];
359 // Undef value.
360 if (Idx < 0)
361 continue;
362 // Ensure the indices in each SrcType sized piece are sequential and that
363 // the same source is used for the whole piece.
364 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
365 (ConcatSrcs[i / SrcNumElts] >= 0 &&
366 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
367 return false;
368 // Remember which source this index came from.
369 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
370 }
371
372 // The shuffle is concatenating multiple vectors together.
373 // Collect the different operands for that.
374 Register UndefReg;
375 Register Src2 = MI.getOperand(2).getReg();
376 for (auto Src : ConcatSrcs) {
377 if (Src < 0) {
378 if (!UndefReg) {
379 Builder.setInsertPt(*MI.getParent(), MI);
380 UndefReg = Builder.buildUndef(SrcType).getReg(0);
381 }
382 Ops.push_back(UndefReg);
383 } else if (Src == 0)
384 Ops.push_back(Src1);
385 else
386 Ops.push_back(Src2);
387 }
388 return true;
389}
390
392 const ArrayRef<Register> Ops) {
393 Register DstReg = MI.getOperand(0).getReg();
394 Builder.setInsertPt(*MI.getParent(), MI);
395 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
396
397 if (Ops.size() == 1)
398 Builder.buildCopy(NewDstReg, Ops[0]);
399 else
400 Builder.buildMergeLikeInstr(NewDstReg, Ops);
401
402 MI.eraseFromParent();
403 replaceRegWith(MRI, DstReg, NewDstReg);
404}
405
407 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
408 "Invalid instruction kind");
409
410 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
411 return Mask.size() == 1;
412}
413
415 Register DstReg = MI.getOperand(0).getReg();
416 Builder.setInsertPt(*MI.getParent(), MI);
417
418 int I = MI.getOperand(3).getShuffleMask()[0];
419 Register Src1 = MI.getOperand(1).getReg();
420 LLT Src1Ty = MRI.getType(Src1);
421 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
422 Register SrcReg;
423 if (I >= Src1NumElts) {
424 SrcReg = MI.getOperand(2).getReg();
425 I -= Src1NumElts;
426 } else if (I >= 0)
427 SrcReg = Src1;
428
429 if (I < 0)
430 Builder.buildUndef(DstReg);
431 else if (!MRI.getType(SrcReg).isVector())
432 Builder.buildCopy(DstReg, SrcReg);
433 else
435
436 MI.eraseFromParent();
437}
438
439namespace {
440
441/// Select a preference between two uses. CurrentUse is the current preference
442/// while *ForCandidate is attributes of the candidate under consideration.
443PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
444 PreferredTuple &CurrentUse,
445 const LLT TyForCandidate,
446 unsigned OpcodeForCandidate,
447 MachineInstr *MIForCandidate) {
448 if (!CurrentUse.Ty.isValid()) {
449 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
450 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
451 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
452 return CurrentUse;
453 }
454
455 // We permit the extend to hoist through basic blocks but this is only
456 // sensible if the target has extending loads. If you end up lowering back
457 // into a load and extend during the legalizer then the end result is
458 // hoisting the extend up to the load.
459
460 // Prefer defined extensions to undefined extensions as these are more
461 // likely to reduce the number of instructions.
462 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
463 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
464 return CurrentUse;
465 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
466 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
467 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
468
469 // Prefer sign extensions to zero extensions as sign-extensions tend to be
470 // more expensive. Don't do this if the load is already a zero-extend load
471 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
472 // later.
473 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
474 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
475 OpcodeForCandidate == TargetOpcode::G_ZEXT)
476 return CurrentUse;
477 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
478 OpcodeForCandidate == TargetOpcode::G_SEXT)
479 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
480 }
481
482 // This is potentially target specific. We've chosen the largest type
483 // because G_TRUNC is usually free. One potential catch with this is that
484 // some targets have a reduced number of larger registers than smaller
485 // registers and this choice potentially increases the live-range for the
486 // larger value.
487 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
488 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
489 }
490 return CurrentUse;
491}
492
493/// Find a suitable place to insert some instructions and insert them. This
494/// function accounts for special cases like inserting before a PHI node.
495/// The current strategy for inserting before PHI's is to duplicate the
496/// instructions for each predecessor. However, while that's ok for G_TRUNC
497/// on most targets since it generally requires no code, other targets/cases may
498/// want to try harder to find a dominating block.
499static void InsertInsnsWithoutSideEffectsBeforeUse(
502 MachineOperand &UseMO)>
503 Inserter) {
504 MachineInstr &UseMI = *UseMO.getParent();
505
506 MachineBasicBlock *InsertBB = UseMI.getParent();
507
508 // If the use is a PHI then we want the predecessor block instead.
509 if (UseMI.isPHI()) {
510 MachineOperand *PredBB = std::next(&UseMO);
511 InsertBB = PredBB->getMBB();
512 }
513
514 // If the block is the same block as the def then we want to insert just after
515 // the def instead of at the start of the block.
516 if (InsertBB == DefMI.getParent()) {
518 Inserter(InsertBB, std::next(InsertPt), UseMO);
519 return;
520 }
521
522 // Otherwise we want the start of the BB
523 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
524}
525} // end anonymous namespace
526
528 PreferredTuple Preferred;
529 if (matchCombineExtendingLoads(MI, Preferred)) {
530 applyCombineExtendingLoads(MI, Preferred);
531 return true;
532 }
533 return false;
534}
535
536static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
537 unsigned CandidateLoadOpc;
538 switch (ExtOpc) {
539 case TargetOpcode::G_ANYEXT:
540 CandidateLoadOpc = TargetOpcode::G_LOAD;
541 break;
542 case TargetOpcode::G_SEXT:
543 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
544 break;
545 case TargetOpcode::G_ZEXT:
546 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
547 break;
548 default:
549 llvm_unreachable("Unexpected extend opc");
550 }
551 return CandidateLoadOpc;
552}
553
555 PreferredTuple &Preferred) {
556 // We match the loads and follow the uses to the extend instead of matching
557 // the extends and following the def to the load. This is because the load
558 // must remain in the same position for correctness (unless we also add code
559 // to find a safe place to sink it) whereas the extend is freely movable.
560 // It also prevents us from duplicating the load for the volatile case or just
561 // for performance.
562 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
563 if (!LoadMI)
564 return false;
565
566 Register LoadReg = LoadMI->getDstReg();
567
568 LLT LoadValueTy = MRI.getType(LoadReg);
569 if (!LoadValueTy.isScalar())
570 return false;
571
572 // Most architectures are going to legalize <s8 loads into at least a 1 byte
573 // load, and the MMOs can only describe memory accesses in multiples of bytes.
574 // If we try to perform extload combining on those, we can end up with
575 // %a(s8) = extload %ptr (load 1 byte from %ptr)
576 // ... which is an illegal extload instruction.
577 if (LoadValueTy.getSizeInBits() < 8)
578 return false;
579
580 // For non power-of-2 types, they will very likely be legalized into multiple
581 // loads. Don't bother trying to match them into extending loads.
582 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
583 return false;
584
585 // Find the preferred type aside from the any-extends (unless it's the only
586 // one) and non-extending ops. We'll emit an extending load to that type and
587 // and emit a variant of (extend (trunc X)) for the others according to the
588 // relative type sizes. At the same time, pick an extend to use based on the
589 // extend involved in the chosen type.
590 unsigned PreferredOpcode =
591 isa<GLoad>(&MI)
592 ? TargetOpcode::G_ANYEXT
593 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
594 Preferred = {LLT(), PreferredOpcode, nullptr};
595 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
596 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
597 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
598 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
599 const auto &MMO = LoadMI->getMMO();
600 // Don't do anything for atomics.
601 if (MMO.isAtomic())
602 continue;
603 // Check for legality.
604 if (!isPreLegalize()) {
605 LegalityQuery::MemDesc MMDesc(MMO);
606 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
607 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
608 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
609 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
610 .Action != LegalizeActions::Legal)
611 continue;
612 }
613 Preferred = ChoosePreferredUse(MI, Preferred,
614 MRI.getType(UseMI.getOperand(0).getReg()),
615 UseMI.getOpcode(), &UseMI);
616 }
617 }
618
619 // There were no extends
620 if (!Preferred.MI)
621 return false;
622 // It should be impossible to chose an extend without selecting a different
623 // type since by definition the result of an extend is larger.
624 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
625
626 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
627 return true;
628}
629
631 PreferredTuple &Preferred) {
632 // Rewrite the load to the chosen extending load.
633 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
634
635 // Inserter to insert a truncate back to the original type at a given point
636 // with some basic CSE to limit truncate duplication to one per BB.
638 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
639 MachineBasicBlock::iterator InsertBefore,
640 MachineOperand &UseMO) {
641 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
642 if (PreviouslyEmitted) {
644 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
646 return;
647 }
648
649 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
650 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
651 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
652 EmittedInsns[InsertIntoBB] = NewMI;
653 replaceRegOpWith(MRI, UseMO, NewDstReg);
654 };
655
657 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
658 MI.setDesc(Builder.getTII().get(LoadOpc));
659
660 // Rewrite all the uses to fix up the types.
661 auto &LoadValue = MI.getOperand(0);
663 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
664 Uses.push_back(&UseMO);
665
666 for (auto *UseMO : Uses) {
667 MachineInstr *UseMI = UseMO->getParent();
668
669 // If the extend is compatible with the preferred extend then we should fix
670 // up the type and extend so that it uses the preferred use.
671 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
672 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
673 Register UseDstReg = UseMI->getOperand(0).getReg();
674 MachineOperand &UseSrcMO = UseMI->getOperand(1);
675 const LLT UseDstTy = MRI.getType(UseDstReg);
676 if (UseDstReg != ChosenDstReg) {
677 if (Preferred.Ty == UseDstTy) {
678 // If the use has the same type as the preferred use, then merge
679 // the vregs and erase the extend. For example:
680 // %1:_(s8) = G_LOAD ...
681 // %2:_(s32) = G_SEXT %1(s8)
682 // %3:_(s32) = G_ANYEXT %1(s8)
683 // ... = ... %3(s32)
684 // rewrites to:
685 // %2:_(s32) = G_SEXTLOAD ...
686 // ... = ... %2(s32)
687 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
689 UseMO->getParent()->eraseFromParent();
690 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
691 // If the preferred size is smaller, then keep the extend but extend
692 // from the result of the extending load. For example:
693 // %1:_(s8) = G_LOAD ...
694 // %2:_(s32) = G_SEXT %1(s8)
695 // %3:_(s64) = G_ANYEXT %1(s8)
696 // ... = ... %3(s64)
697 /// rewrites to:
698 // %2:_(s32) = G_SEXTLOAD ...
699 // %3:_(s64) = G_ANYEXT %2:_(s32)
700 // ... = ... %3(s64)
701 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
702 } else {
703 // If the preferred size is large, then insert a truncate. For
704 // example:
705 // %1:_(s8) = G_LOAD ...
706 // %2:_(s64) = G_SEXT %1(s8)
707 // %3:_(s32) = G_ZEXT %1(s8)
708 // ... = ... %3(s32)
709 /// rewrites to:
710 // %2:_(s64) = G_SEXTLOAD ...
711 // %4:_(s8) = G_TRUNC %2:_(s32)
712 // %3:_(s64) = G_ZEXT %2:_(s8)
713 // ... = ... %3(s64)
714 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
715 InsertTruncAt);
716 }
717 continue;
718 }
719 // The use is (one of) the uses of the preferred use we chose earlier.
720 // We're going to update the load to def this value later so just erase
721 // the old extend.
723 UseMO->getParent()->eraseFromParent();
724 continue;
725 }
726
727 // The use isn't an extend. Truncate back to the type we originally loaded.
728 // This is free on many targets.
729 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
730 }
731
732 MI.getOperand(0).setReg(ChosenDstReg);
734}
735
737 BuildFnTy &MatchInfo) {
738 assert(MI.getOpcode() == TargetOpcode::G_AND);
739
740 // If we have the following code:
741 // %mask = G_CONSTANT 255
742 // %ld = G_LOAD %ptr, (load s16)
743 // %and = G_AND %ld, %mask
744 //
745 // Try to fold it into
746 // %ld = G_ZEXTLOAD %ptr, (load s8)
747
748 Register Dst = MI.getOperand(0).getReg();
749 if (MRI.getType(Dst).isVector())
750 return false;
751
752 auto MaybeMask =
753 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
754 if (!MaybeMask)
755 return false;
756
757 APInt MaskVal = MaybeMask->Value;
758
759 if (!MaskVal.isMask())
760 return false;
761
762 Register SrcReg = MI.getOperand(1).getReg();
763 // Don't use getOpcodeDef() here since intermediate instructions may have
764 // multiple users.
765 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
766 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
767 return false;
768
769 Register LoadReg = LoadMI->getDstReg();
770 LLT RegTy = MRI.getType(LoadReg);
771 Register PtrReg = LoadMI->getPointerReg();
772 unsigned RegSize = RegTy.getSizeInBits();
773 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
774 unsigned MaskSizeBits = MaskVal.countr_one();
775
776 // The mask may not be larger than the in-memory type, as it might cover sign
777 // extended bits
778 if (MaskSizeBits > LoadSizeBits.getValue())
779 return false;
780
781 // If the mask covers the whole destination register, there's nothing to
782 // extend
783 if (MaskSizeBits >= RegSize)
784 return false;
785
786 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
787 // at least byte loads. Avoid creating such loads here
788 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
789 return false;
790
791 const MachineMemOperand &MMO = LoadMI->getMMO();
792 LegalityQuery::MemDesc MemDesc(MMO);
793
794 // Don't modify the memory access size if this is atomic/volatile, but we can
795 // still adjust the opcode to indicate the high bit behavior.
796 if (LoadMI->isSimple())
797 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
798 else if (LoadSizeBits.getValue() > MaskSizeBits ||
799 LoadSizeBits.getValue() == RegSize)
800 return false;
801
802 // TODO: Could check if it's legal with the reduced or original memory size.
804 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
805 return false;
806
807 MatchInfo = [=](MachineIRBuilder &B) {
808 B.setInstrAndDebugLoc(*LoadMI);
809 auto &MF = B.getMF();
810 auto PtrInfo = MMO.getPointerInfo();
811 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
812 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
813 LoadMI->eraseFromParent();
814 };
815 return true;
816}
817
819 const MachineInstr &UseMI) {
820 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
821 "shouldn't consider debug uses");
822 assert(DefMI.getParent() == UseMI.getParent());
823 if (&DefMI == &UseMI)
824 return true;
825 const MachineBasicBlock &MBB = *DefMI.getParent();
826 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
827 return &MI == &DefMI || &MI == &UseMI;
828 });
829 if (DefOrUse == MBB.end())
830 llvm_unreachable("Block must contain both DefMI and UseMI!");
831 return &*DefOrUse == &DefMI;
832}
833
835 const MachineInstr &UseMI) {
836 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
837 "shouldn't consider debug uses");
838 if (MDT)
839 return MDT->dominates(&DefMI, &UseMI);
840 else if (DefMI.getParent() != UseMI.getParent())
841 return false;
842
843 return isPredecessor(DefMI, UseMI);
844}
845
847 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
848 Register SrcReg = MI.getOperand(1).getReg();
849 Register LoadUser = SrcReg;
850
851 if (MRI.getType(SrcReg).isVector())
852 return false;
853
854 Register TruncSrc;
855 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
856 LoadUser = TruncSrc;
857
858 uint64_t SizeInBits = MI.getOperand(2).getImm();
859 // If the source is a G_SEXTLOAD from the same bit width, then we don't
860 // need any extend at all, just a truncate.
861 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
862 // If truncating more than the original extended value, abort.
863 auto LoadSizeBits = LoadMI->getMemSizeInBits();
864 if (TruncSrc &&
865 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
866 return false;
867 if (LoadSizeBits == SizeInBits)
868 return true;
869 }
870 return false;
871}
872
874 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
875 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
876 MI.eraseFromParent();
877}
878
880 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
881 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
882
883 Register DstReg = MI.getOperand(0).getReg();
884 LLT RegTy = MRI.getType(DstReg);
885
886 // Only supports scalars for now.
887 if (RegTy.isVector())
888 return false;
889
890 Register SrcReg = MI.getOperand(1).getReg();
891 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
892 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
893 return false;
894
895 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
896
897 // If the sign extend extends from a narrower width than the load's width,
898 // then we can narrow the load width when we combine to a G_SEXTLOAD.
899 // Avoid widening the load at all.
900 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
901
902 // Don't generate G_SEXTLOADs with a < 1 byte width.
903 if (NewSizeBits < 8)
904 return false;
905 // Don't bother creating a non-power-2 sextload, it will likely be broken up
906 // anyway for most targets.
907 if (!isPowerOf2_32(NewSizeBits))
908 return false;
909
910 const MachineMemOperand &MMO = LoadDef->getMMO();
911 LegalityQuery::MemDesc MMDesc(MMO);
912
913 // Don't modify the memory access size if this is atomic/volatile, but we can
914 // still adjust the opcode to indicate the high bit behavior.
915 if (LoadDef->isSimple())
916 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
917 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
918 return false;
919
920 // TODO: Could check if it's legal with the reduced or original memory size.
921 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
922 {MRI.getType(LoadDef->getDstReg()),
923 MRI.getType(LoadDef->getPointerReg())},
924 {MMDesc}}))
925 return false;
926
927 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
928 return true;
929}
930
932 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
933 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
934 Register LoadReg;
935 unsigned ScalarSizeBits;
936 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
937 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
938
939 // If we have the following:
940 // %ld = G_LOAD %ptr, (load 2)
941 // %ext = G_SEXT_INREG %ld, 8
942 // ==>
943 // %ld = G_SEXTLOAD %ptr (load 1)
944
945 auto &MMO = LoadDef->getMMO();
947 auto &MF = Builder.getMF();
948 auto PtrInfo = MMO.getPointerInfo();
949 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
950 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
951 LoadDef->getPointerReg(), *NewMMO);
952 MI.eraseFromParent();
953}
954
956 if (Ty.isVector())
958 Ty.getNumElements());
959 return IntegerType::get(C, Ty.getSizeInBits());
960}
961
962/// Return true if 'MI' is a load or a store that may be fold it's address
963/// operand into the load / store addressing mode.
967 auto *MF = MI->getMF();
968 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
969 if (!Addr)
970 return false;
971
972 AM.HasBaseReg = true;
973 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
974 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
975 else
976 AM.Scale = 1; // [reg +/- reg]
977
978 return TLI.isLegalAddressingMode(
979 MF->getDataLayout(), AM,
980 getTypeForLLT(MI->getMMO().getMemoryType(),
981 MF->getFunction().getContext()),
982 MI->getMMO().getAddrSpace());
983}
984
985static unsigned getIndexedOpc(unsigned LdStOpc) {
986 switch (LdStOpc) {
987 case TargetOpcode::G_LOAD:
988 return TargetOpcode::G_INDEXED_LOAD;
989 case TargetOpcode::G_STORE:
990 return TargetOpcode::G_INDEXED_STORE;
991 case TargetOpcode::G_ZEXTLOAD:
992 return TargetOpcode::G_INDEXED_ZEXTLOAD;
993 case TargetOpcode::G_SEXTLOAD:
994 return TargetOpcode::G_INDEXED_SEXTLOAD;
995 default:
996 llvm_unreachable("Unexpected opcode");
997 }
998}
999
1000bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1001 // Check for legality.
1002 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1003 LLT Ty = MRI.getType(LdSt.getReg(0));
1004 LLT MemTy = LdSt.getMMO().getMemoryType();
1006 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1007 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1008 SmallVector<LLT> OpTys;
1009 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1010 OpTys = {PtrTy, Ty, Ty};
1011 else
1012 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1013
1014 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1015 return isLegal(Q);
1016}
1017
1019 "post-index-use-threshold", cl::Hidden, cl::init(32),
1020 cl::desc("Number of uses of a base pointer to check before it is no longer "
1021 "considered for post-indexing."));
1022
1023bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1025 bool &RematOffset) {
1026 // We're looking for the following pattern, for either load or store:
1027 // %baseptr:_(p0) = ...
1028 // G_STORE %val(s64), %baseptr(p0)
1029 // %offset:_(s64) = G_CONSTANT i64 -256
1030 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1031 const auto &TLI = getTargetLowering();
1032
1033 Register Ptr = LdSt.getPointerReg();
1034 // If the store is the only use, don't bother.
1035 if (MRI.hasOneNonDBGUse(Ptr))
1036 return false;
1037
1038 if (!isIndexedLoadStoreLegal(LdSt))
1039 return false;
1040
1041 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1042 return false;
1043
1044 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1045 auto *PtrDef = MRI.getVRegDef(Ptr);
1046
1047 unsigned NumUsesChecked = 0;
1048 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1049 if (++NumUsesChecked > PostIndexUseThreshold)
1050 return false; // Try to avoid exploding compile time.
1051
1052 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1053 // The use itself might be dead. This can happen during combines if DCE
1054 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1055 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1056 continue;
1057
1058 // Check the user of this isn't the store, otherwise we'd be generate a
1059 // indexed store defining its own use.
1060 if (StoredValDef == &Use)
1061 continue;
1062
1063 Offset = PtrAdd->getOffsetReg();
1064 if (!ForceLegalIndexing &&
1065 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1066 /*IsPre*/ false, MRI))
1067 continue;
1068
1069 // Make sure the offset calculation is before the potentially indexed op.
1070 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1071 RematOffset = false;
1072 if (!dominates(*OffsetDef, LdSt)) {
1073 // If the offset however is just a G_CONSTANT, we can always just
1074 // rematerialize it where we need it.
1075 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1076 continue;
1077 RematOffset = true;
1078 }
1079
1080 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1081 if (&BasePtrUse == PtrDef)
1082 continue;
1083
1084 // If the user is a later load/store that can be post-indexed, then don't
1085 // combine this one.
1086 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1087 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1088 dominates(LdSt, *BasePtrLdSt) &&
1089 isIndexedLoadStoreLegal(*BasePtrLdSt))
1090 return false;
1091
1092 // Now we're looking for the key G_PTR_ADD instruction, which contains
1093 // the offset add that we want to fold.
1094 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1095 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1096 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1097 // If the use is in a different block, then we may produce worse code
1098 // due to the extra register pressure.
1099 if (BaseUseUse.getParent() != LdSt.getParent())
1100 return false;
1101
1102 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1103 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1104 return false;
1105 }
1106 if (!dominates(LdSt, BasePtrUse))
1107 return false; // All use must be dominated by the load/store.
1108 }
1109 }
1110
1111 Addr = PtrAdd->getReg(0);
1112 Base = PtrAdd->getBaseReg();
1113 return true;
1114 }
1115
1116 return false;
1117}
1118
1119bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1121 auto &MF = *LdSt.getParent()->getParent();
1122 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1123
1124 Addr = LdSt.getPointerReg();
1127 return false;
1128
1129 if (!ForceLegalIndexing &&
1130 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1131 return false;
1132
1133 if (!isIndexedLoadStoreLegal(LdSt))
1134 return false;
1135
1137 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1138 return false;
1139
1140 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1141 // Would require a copy.
1142 if (Base == St->getValueReg())
1143 return false;
1144
1145 // We're expecting one use of Addr in MI, but it could also be the
1146 // value stored, which isn't actually dominated by the instruction.
1147 if (St->getValueReg() == Addr)
1148 return false;
1149 }
1150
1151 // Avoid increasing cross-block register pressure.
1152 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1153 if (AddrUse.getParent() != LdSt.getParent())
1154 return false;
1155
1156 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1157 // That might allow us to end base's liveness here by adjusting the constant.
1158 bool RealUse = false;
1159 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1160 if (!dominates(LdSt, AddrUse))
1161 return false; // All use must be dominated by the load/store.
1162
1163 // If Ptr may be folded in addressing mode of other use, then it's
1164 // not profitable to do this transformation.
1165 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1166 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1167 RealUse = true;
1168 } else {
1169 RealUse = true;
1170 }
1171 }
1172 return RealUse;
1173}
1174
1176 BuildFnTy &MatchInfo) {
1177 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1178
1179 // Check if there is a load that defines the vector being extracted from.
1180 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1181 if (!LoadMI)
1182 return false;
1183
1184 Register Vector = MI.getOperand(1).getReg();
1185 LLT VecEltTy = MRI.getType(Vector).getElementType();
1186
1187 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1188
1189 // Checking whether we should reduce the load width.
1191 return false;
1192
1193 // Check if the defining load is simple.
1194 if (!LoadMI->isSimple())
1195 return false;
1196
1197 // If the vector element type is not a multiple of a byte then we are unable
1198 // to correctly compute an address to load only the extracted element as a
1199 // scalar.
1200 if (!VecEltTy.isByteSized())
1201 return false;
1202
1203 // Check for load fold barriers between the extraction and the load.
1204 if (MI.getParent() != LoadMI->getParent())
1205 return false;
1206 const unsigned MaxIter = 20;
1207 unsigned Iter = 0;
1208 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1209 if (II->isLoadFoldBarrier())
1210 return false;
1211 if (Iter++ == MaxIter)
1212 return false;
1213 }
1214
1215 // Check if the new load that we are going to create is legal
1216 // if we are in the post-legalization phase.
1217 MachineMemOperand MMO = LoadMI->getMMO();
1218 Align Alignment = MMO.getAlign();
1219 MachinePointerInfo PtrInfo;
1221
1222 // Finding the appropriate PtrInfo if offset is a known constant.
1223 // This is required to create the memory operand for the narrowed load.
1224 // This machine memory operand object helps us infer about legality
1225 // before we proceed to combine the instruction.
1226 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1227 int Elt = CVal->getZExtValue();
1228 // FIXME: should be (ABI size)*Elt.
1229 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1230 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1231 } else {
1232 // Discard the pointer info except the address space because the memory
1233 // operand can't represent this new access since the offset is variable.
1234 Offset = VecEltTy.getSizeInBits() / 8;
1236 }
1237
1238 Alignment = commonAlignment(Alignment, Offset);
1239
1240 Register VecPtr = LoadMI->getPointerReg();
1241 LLT PtrTy = MRI.getType(VecPtr);
1242
1243 MachineFunction &MF = *MI.getMF();
1244 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1245
1246 LegalityQuery::MemDesc MMDesc(*NewMMO);
1247
1248 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1249
1251 return false;
1252
1253 // Load must be allowed and fast on the target.
1255 auto &DL = MF.getDataLayout();
1256 unsigned Fast = 0;
1257 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1258 &Fast) ||
1259 !Fast)
1260 return false;
1261
1262 Register Result = MI.getOperand(0).getReg();
1263 Register Index = MI.getOperand(2).getReg();
1264
1265 MatchInfo = [=](MachineIRBuilder &B) {
1266 GISelObserverWrapper DummyObserver;
1267 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1268 //// Get pointer to the vector element.
1269 Register finalPtr = Helper.getVectorElementPointer(
1270 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1271 Index);
1272 // New G_LOAD instruction.
1273 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1274 // Remove original GLOAD instruction.
1275 LoadMI->eraseFromParent();
1276 };
1277
1278 return true;
1279}
1280
1283 auto &LdSt = cast<GLoadStore>(MI);
1284
1285 if (LdSt.isAtomic())
1286 return false;
1287
1288 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1289 MatchInfo.Offset);
1290 if (!MatchInfo.IsPre &&
1291 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1292 MatchInfo.Offset, MatchInfo.RematOffset))
1293 return false;
1294
1295 return true;
1296}
1297
1300 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1301 unsigned Opcode = MI.getOpcode();
1302 bool IsStore = Opcode == TargetOpcode::G_STORE;
1303 unsigned NewOpcode = getIndexedOpc(Opcode);
1304
1305 // If the offset constant didn't happen to dominate the load/store, we can
1306 // just clone it as needed.
1307 if (MatchInfo.RematOffset) {
1308 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1309 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1310 *OldCst->getOperand(1).getCImm());
1311 MatchInfo.Offset = NewCst.getReg(0);
1312 }
1313
1314 auto MIB = Builder.buildInstr(NewOpcode);
1315 if (IsStore) {
1316 MIB.addDef(MatchInfo.Addr);
1317 MIB.addUse(MI.getOperand(0).getReg());
1318 } else {
1319 MIB.addDef(MI.getOperand(0).getReg());
1320 MIB.addDef(MatchInfo.Addr);
1321 }
1322
1323 MIB.addUse(MatchInfo.Base);
1324 MIB.addUse(MatchInfo.Offset);
1325 MIB.addImm(MatchInfo.IsPre);
1326 MIB->cloneMemRefs(*MI.getMF(), MI);
1327 MI.eraseFromParent();
1328 AddrDef.eraseFromParent();
1329
1330 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1331}
1332
1334 MachineInstr *&OtherMI) {
1335 unsigned Opcode = MI.getOpcode();
1336 bool IsDiv, IsSigned;
1337
1338 switch (Opcode) {
1339 default:
1340 llvm_unreachable("Unexpected opcode!");
1341 case TargetOpcode::G_SDIV:
1342 case TargetOpcode::G_UDIV: {
1343 IsDiv = true;
1344 IsSigned = Opcode == TargetOpcode::G_SDIV;
1345 break;
1346 }
1347 case TargetOpcode::G_SREM:
1348 case TargetOpcode::G_UREM: {
1349 IsDiv = false;
1350 IsSigned = Opcode == TargetOpcode::G_SREM;
1351 break;
1352 }
1353 }
1354
1355 Register Src1 = MI.getOperand(1).getReg();
1356 unsigned DivOpcode, RemOpcode, DivremOpcode;
1357 if (IsSigned) {
1358 DivOpcode = TargetOpcode::G_SDIV;
1359 RemOpcode = TargetOpcode::G_SREM;
1360 DivremOpcode = TargetOpcode::G_SDIVREM;
1361 } else {
1362 DivOpcode = TargetOpcode::G_UDIV;
1363 RemOpcode = TargetOpcode::G_UREM;
1364 DivremOpcode = TargetOpcode::G_UDIVREM;
1365 }
1366
1367 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1368 return false;
1369
1370 // Combine:
1371 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1372 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1373 // into:
1374 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1375
1376 // Combine:
1377 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1378 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1379 // into:
1380 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1381
1382 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1383 if (MI.getParent() == UseMI.getParent() &&
1384 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1385 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1386 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1387 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1388 OtherMI = &UseMI;
1389 return true;
1390 }
1391 }
1392
1393 return false;
1394}
1395
1397 MachineInstr *&OtherMI) {
1398 unsigned Opcode = MI.getOpcode();
1399 assert(OtherMI && "OtherMI shouldn't be empty.");
1400
1401 Register DestDivReg, DestRemReg;
1402 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1403 DestDivReg = MI.getOperand(0).getReg();
1404 DestRemReg = OtherMI->getOperand(0).getReg();
1405 } else {
1406 DestDivReg = OtherMI->getOperand(0).getReg();
1407 DestRemReg = MI.getOperand(0).getReg();
1408 }
1409
1410 bool IsSigned =
1411 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1412
1413 // Check which instruction is first in the block so we don't break def-use
1414 // deps by "moving" the instruction incorrectly. Also keep track of which
1415 // instruction is first so we pick it's operands, avoiding use-before-def
1416 // bugs.
1417 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1418 Builder.setInstrAndDebugLoc(*FirstInst);
1419
1420 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1421 : TargetOpcode::G_UDIVREM,
1422 {DestDivReg, DestRemReg},
1423 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1424 MI.eraseFromParent();
1425 OtherMI->eraseFromParent();
1426}
1427
1429 MachineInstr *&BrCond) {
1430 assert(MI.getOpcode() == TargetOpcode::G_BR);
1431
1432 // Try to match the following:
1433 // bb1:
1434 // G_BRCOND %c1, %bb2
1435 // G_BR %bb3
1436 // bb2:
1437 // ...
1438 // bb3:
1439
1440 // The above pattern does not have a fall through to the successor bb2, always
1441 // resulting in a branch no matter which path is taken. Here we try to find
1442 // and replace that pattern with conditional branch to bb3 and otherwise
1443 // fallthrough to bb2. This is generally better for branch predictors.
1444
1445 MachineBasicBlock *MBB = MI.getParent();
1447 if (BrIt == MBB->begin())
1448 return false;
1449 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1450
1451 BrCond = &*std::prev(BrIt);
1452 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1453 return false;
1454
1455 // Check that the next block is the conditional branch target. Also make sure
1456 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1457 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1458 return BrCondTarget != MI.getOperand(0).getMBB() &&
1459 MBB->isLayoutSuccessor(BrCondTarget);
1460}
1461
1463 MachineInstr *&BrCond) {
1464 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1466 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1467 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1468 // this to i1 only since we might not know for sure what kind of
1469 // compare generated the condition value.
1470 auto True = Builder.buildConstant(
1471 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1472 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1473
1474 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1476 MI.getOperand(0).setMBB(FallthroughBB);
1478
1479 // Change the conditional branch to use the inverted condition and
1480 // new target block.
1481 Observer.changingInstr(*BrCond);
1482 BrCond->getOperand(0).setReg(Xor.getReg(0));
1483 BrCond->getOperand(1).setMBB(BrTarget);
1484 Observer.changedInstr(*BrCond);
1485}
1486
1487
1489 MachineIRBuilder HelperBuilder(MI);
1490 GISelObserverWrapper DummyObserver;
1491 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1492 return Helper.lowerMemcpyInline(MI) ==
1494}
1495
1497 MachineIRBuilder HelperBuilder(MI);
1498 GISelObserverWrapper DummyObserver;
1499 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1500 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1502}
1503
1505 const MachineRegisterInfo &MRI,
1506 const APFloat &Val) {
1507 APFloat Result(Val);
1508 switch (MI.getOpcode()) {
1509 default:
1510 llvm_unreachable("Unexpected opcode!");
1511 case TargetOpcode::G_FNEG: {
1512 Result.changeSign();
1513 return Result;
1514 }
1515 case TargetOpcode::G_FABS: {
1516 Result.clearSign();
1517 return Result;
1518 }
1519 case TargetOpcode::G_FPTRUNC: {
1520 bool Unused;
1521 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1523 &Unused);
1524 return Result;
1525 }
1526 case TargetOpcode::G_FSQRT: {
1527 bool Unused;
1529 &Unused);
1530 Result = APFloat(sqrt(Result.convertToDouble()));
1531 break;
1532 }
1533 case TargetOpcode::G_FLOG2: {
1534 bool Unused;
1536 &Unused);
1537 Result = APFloat(log2(Result.convertToDouble()));
1538 break;
1539 }
1540 }
1541 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1542 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1543 // `G_FLOG2` reach here.
1544 bool Unused;
1545 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1546 return Result;
1547}
1548
1550 const ConstantFP *Cst) {
1551 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1552 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1553 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1554 MI.eraseFromParent();
1555}
1556
1558 PtrAddChain &MatchInfo) {
1559 // We're trying to match the following pattern:
1560 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1561 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1562 // -->
1563 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1564
1565 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1566 return false;
1567
1568 Register Add2 = MI.getOperand(1).getReg();
1569 Register Imm1 = MI.getOperand(2).getReg();
1570 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1571 if (!MaybeImmVal)
1572 return false;
1573
1574 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1575 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1576 return false;
1577
1578 Register Base = Add2Def->getOperand(1).getReg();
1579 Register Imm2 = Add2Def->getOperand(2).getReg();
1580 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1581 if (!MaybeImm2Val)
1582 return false;
1583
1584 // Check if the new combined immediate forms an illegal addressing mode.
1585 // Do not combine if it was legal before but would get illegal.
1586 // To do so, we need to find a load/store user of the pointer to get
1587 // the access type.
1588 Type *AccessTy = nullptr;
1589 auto &MF = *MI.getMF();
1590 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1591 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1592 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1593 MF.getFunction().getContext());
1594 break;
1595 }
1596 }
1598 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1599 AMNew.BaseOffs = CombinedImm.getSExtValue();
1600 if (AccessTy) {
1601 AMNew.HasBaseReg = true;
1603 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1604 AMOld.HasBaseReg = true;
1605 unsigned AS = MRI.getType(Add2).getAddressSpace();
1606 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1607 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1608 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1609 return false;
1610 }
1611
1612 // Pass the combined immediate to the apply function.
1613 MatchInfo.Imm = AMNew.BaseOffs;
1614 MatchInfo.Base = Base;
1615 MatchInfo.Bank = getRegBank(Imm2);
1616 return true;
1617}
1618
1620 PtrAddChain &MatchInfo) {
1621 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1622 MachineIRBuilder MIB(MI);
1623 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1624 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1625 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1627 MI.getOperand(1).setReg(MatchInfo.Base);
1628 MI.getOperand(2).setReg(NewOffset.getReg(0));
1630}
1631
1633 RegisterImmPair &MatchInfo) {
1634 // We're trying to match the following pattern with any of
1635 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1636 // %t1 = SHIFT %base, G_CONSTANT imm1
1637 // %root = SHIFT %t1, G_CONSTANT imm2
1638 // -->
1639 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1640
1641 unsigned Opcode = MI.getOpcode();
1642 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1643 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1644 Opcode == TargetOpcode::G_USHLSAT) &&
1645 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1646
1647 Register Shl2 = MI.getOperand(1).getReg();
1648 Register Imm1 = MI.getOperand(2).getReg();
1649 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1650 if (!MaybeImmVal)
1651 return false;
1652
1653 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1654 if (Shl2Def->getOpcode() != Opcode)
1655 return false;
1656
1657 Register Base = Shl2Def->getOperand(1).getReg();
1658 Register Imm2 = Shl2Def->getOperand(2).getReg();
1659 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1660 if (!MaybeImm2Val)
1661 return false;
1662
1663 // Pass the combined immediate to the apply function.
1664 MatchInfo.Imm =
1665 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1666 MatchInfo.Reg = Base;
1667
1668 // There is no simple replacement for a saturating unsigned left shift that
1669 // exceeds the scalar size.
1670 if (Opcode == TargetOpcode::G_USHLSAT &&
1671 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1672 return false;
1673
1674 return true;
1675}
1676
1678 RegisterImmPair &MatchInfo) {
1679 unsigned Opcode = MI.getOpcode();
1680 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1681 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1682 Opcode == TargetOpcode::G_USHLSAT) &&
1683 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1684
1685 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1686 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1687 auto Imm = MatchInfo.Imm;
1688
1689 if (Imm >= ScalarSizeInBits) {
1690 // Any logical shift that exceeds scalar size will produce zero.
1691 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1692 Builder.buildConstant(MI.getOperand(0), 0);
1693 MI.eraseFromParent();
1694 return;
1695 }
1696 // Arithmetic shift and saturating signed left shift have no effect beyond
1697 // scalar size.
1698 Imm = ScalarSizeInBits - 1;
1699 }
1700
1701 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1702 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1704 MI.getOperand(1).setReg(MatchInfo.Reg);
1705 MI.getOperand(2).setReg(NewImm);
1707}
1708
1710 ShiftOfShiftedLogic &MatchInfo) {
1711 // We're trying to match the following pattern with any of
1712 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1713 // with any of G_AND/G_OR/G_XOR logic instructions.
1714 // %t1 = SHIFT %X, G_CONSTANT C0
1715 // %t2 = LOGIC %t1, %Y
1716 // %root = SHIFT %t2, G_CONSTANT C1
1717 // -->
1718 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1719 // %t4 = SHIFT %Y, G_CONSTANT C1
1720 // %root = LOGIC %t3, %t4
1721 unsigned ShiftOpcode = MI.getOpcode();
1722 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1723 ShiftOpcode == TargetOpcode::G_ASHR ||
1724 ShiftOpcode == TargetOpcode::G_LSHR ||
1725 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1726 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1727 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1728
1729 // Match a one-use bitwise logic op.
1730 Register LogicDest = MI.getOperand(1).getReg();
1731 if (!MRI.hasOneNonDBGUse(LogicDest))
1732 return false;
1733
1734 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1735 unsigned LogicOpcode = LogicMI->getOpcode();
1736 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1737 LogicOpcode != TargetOpcode::G_XOR)
1738 return false;
1739
1740 // Find a matching one-use shift by constant.
1741 const Register C1 = MI.getOperand(2).getReg();
1742 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1743 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1744 return false;
1745
1746 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1747
1748 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1749 // Shift should match previous one and should be a one-use.
1750 if (MI->getOpcode() != ShiftOpcode ||
1751 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1752 return false;
1753
1754 // Must be a constant.
1755 auto MaybeImmVal =
1756 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1757 if (!MaybeImmVal)
1758 return false;
1759
1760 ShiftVal = MaybeImmVal->Value.getSExtValue();
1761 return true;
1762 };
1763
1764 // Logic ops are commutative, so check each operand for a match.
1765 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1766 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1767 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1768 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1769 uint64_t C0Val;
1770
1771 if (matchFirstShift(LogicMIOp1, C0Val)) {
1772 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1773 MatchInfo.Shift2 = LogicMIOp1;
1774 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1775 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1776 MatchInfo.Shift2 = LogicMIOp2;
1777 } else
1778 return false;
1779
1780 MatchInfo.ValSum = C0Val + C1Val;
1781
1782 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1783 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1784 return false;
1785
1786 MatchInfo.Logic = LogicMI;
1787 return true;
1788}
1789
1791 ShiftOfShiftedLogic &MatchInfo) {
1792 unsigned Opcode = MI.getOpcode();
1793 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1794 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1795 Opcode == TargetOpcode::G_SSHLSAT) &&
1796 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1797
1798 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1799 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1800
1801 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1802
1803 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1804 Register Shift1 =
1805 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1806
1807 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1808 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1809 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1810 // remove old shift1. And it will cause crash later. So erase it earlier to
1811 // avoid the crash.
1812 MatchInfo.Shift2->eraseFromParent();
1813
1814 Register Shift2Const = MI.getOperand(2).getReg();
1815 Register Shift2 = Builder
1816 .buildInstr(Opcode, {DestType},
1817 {MatchInfo.LogicNonShiftReg, Shift2Const})
1818 .getReg(0);
1819
1820 Register Dest = MI.getOperand(0).getReg();
1821 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1822
1823 // This was one use so it's safe to remove it.
1824 MatchInfo.Logic->eraseFromParent();
1825
1826 MI.eraseFromParent();
1827}
1828
1830 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1831 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1832 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1833 auto &Shl = cast<GenericMachineInstr>(MI);
1834 Register DstReg = Shl.getReg(0);
1835 Register SrcReg = Shl.getReg(1);
1836 Register ShiftReg = Shl.getReg(2);
1837 Register X, C1;
1838
1839 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1840 return false;
1841
1842 if (!mi_match(SrcReg, MRI,
1844 m_GOr(m_Reg(X), m_Reg(C1))))))
1845 return false;
1846
1847 APInt C1Val, C2Val;
1848 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1849 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1850 return false;
1851
1852 auto *SrcDef = MRI.getVRegDef(SrcReg);
1853 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1854 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1855 LLT SrcTy = MRI.getType(SrcReg);
1856 MatchInfo = [=](MachineIRBuilder &B) {
1857 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1858 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1859 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1860 };
1861 return true;
1862}
1863
1865 unsigned &ShiftVal) {
1866 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1867 auto MaybeImmVal =
1868 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1869 if (!MaybeImmVal)
1870 return false;
1871
1872 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1873 return (static_cast<int32_t>(ShiftVal) != -1);
1874}
1875
1877 unsigned &ShiftVal) {
1878 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1879 MachineIRBuilder MIB(MI);
1880 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1881 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1883 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1884 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1886}
1887
1888// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1890 RegisterImmPair &MatchData) {
1891 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1892 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1893 return false;
1894
1895 Register LHS = MI.getOperand(1).getReg();
1896
1897 Register ExtSrc;
1898 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1899 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1900 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1901 return false;
1902
1903 Register RHS = MI.getOperand(2).getReg();
1904 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1905 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1906 if (!MaybeShiftAmtVal)
1907 return false;
1908
1909 if (LI) {
1910 LLT SrcTy = MRI.getType(ExtSrc);
1911
1912 // We only really care about the legality with the shifted value. We can
1913 // pick any type the constant shift amount, so ask the target what to
1914 // use. Otherwise we would have to guess and hope it is reported as legal.
1915 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1916 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1917 return false;
1918 }
1919
1920 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1921 MatchData.Reg = ExtSrc;
1922 MatchData.Imm = ShiftAmt;
1923
1924 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
1925 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
1926 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
1927}
1928
1930 const RegisterImmPair &MatchData) {
1931 Register ExtSrcReg = MatchData.Reg;
1932 int64_t ShiftAmtVal = MatchData.Imm;
1933
1934 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1935 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1936 auto NarrowShift =
1937 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1938 Builder.buildZExt(MI.getOperand(0), NarrowShift);
1939 MI.eraseFromParent();
1940}
1941
1943 Register &MatchInfo) {
1944 GMerge &Merge = cast<GMerge>(MI);
1945 SmallVector<Register, 16> MergedValues;
1946 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
1947 MergedValues.emplace_back(Merge.getSourceReg(I));
1948
1949 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
1950 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
1951 return false;
1952
1953 for (unsigned I = 0; I < MergedValues.size(); ++I)
1954 if (MergedValues[I] != Unmerge->getReg(I))
1955 return false;
1956
1957 MatchInfo = Unmerge->getSourceReg();
1958 return true;
1959}
1960
1962 const MachineRegisterInfo &MRI) {
1963 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1964 ;
1965
1966 return Reg;
1967}
1968
1971 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1972 "Expected an unmerge");
1973 auto &Unmerge = cast<GUnmerge>(MI);
1974 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
1975
1976 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
1977 if (!SrcInstr)
1978 return false;
1979
1980 // Check the source type of the merge.
1981 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
1982 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
1983 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
1984 if (SrcMergeTy != Dst0Ty && !SameSize)
1985 return false;
1986 // They are the same now (modulo a bitcast).
1987 // We can collect all the src registers.
1988 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
1989 Operands.push_back(SrcInstr->getSourceReg(Idx));
1990 return true;
1991}
1992
1995 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1996 "Expected an unmerge");
1997 assert((MI.getNumOperands() - 1 == Operands.size()) &&
1998 "Not enough operands to replace all defs");
1999 unsigned NumElems = MI.getNumOperands() - 1;
2000
2001 LLT SrcTy = MRI.getType(Operands[0]);
2002 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2003 bool CanReuseInputDirectly = DstTy == SrcTy;
2004 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2005 Register DstReg = MI.getOperand(Idx).getReg();
2006 Register SrcReg = Operands[Idx];
2007
2008 // This combine may run after RegBankSelect, so we need to be aware of
2009 // register banks.
2010 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2011 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2012 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2013 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2014 }
2015
2016 if (CanReuseInputDirectly)
2017 replaceRegWith(MRI, DstReg, SrcReg);
2018 else
2019 Builder.buildCast(DstReg, SrcReg);
2020 }
2021 MI.eraseFromParent();
2022}
2023
2025 SmallVectorImpl<APInt> &Csts) {
2026 unsigned SrcIdx = MI.getNumOperands() - 1;
2027 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2028 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2029 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2030 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2031 return false;
2032 // Break down the big constant in smaller ones.
2033 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2034 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2035 ? CstVal.getCImm()->getValue()
2036 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2037
2038 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2039 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2040 // Unmerge a constant.
2041 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2042 Csts.emplace_back(Val.trunc(ShiftAmt));
2043 Val = Val.lshr(ShiftAmt);
2044 }
2045
2046 return true;
2047}
2048
2050 SmallVectorImpl<APInt> &Csts) {
2051 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2052 "Expected an unmerge");
2053 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2054 "Not enough operands to replace all defs");
2055 unsigned NumElems = MI.getNumOperands() - 1;
2056 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2057 Register DstReg = MI.getOperand(Idx).getReg();
2058 Builder.buildConstant(DstReg, Csts[Idx]);
2059 }
2060
2061 MI.eraseFromParent();
2062}
2063
2065 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2066 unsigned SrcIdx = MI.getNumOperands() - 1;
2067 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2068 MatchInfo = [&MI](MachineIRBuilder &B) {
2069 unsigned NumElems = MI.getNumOperands() - 1;
2070 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2071 Register DstReg = MI.getOperand(Idx).getReg();
2072 B.buildUndef(DstReg);
2073 }
2074 };
2075 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2076}
2077
2079 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2080 "Expected an unmerge");
2081 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2082 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2083 return false;
2084 // Check that all the lanes are dead except the first one.
2085 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2086 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2087 return false;
2088 }
2089 return true;
2090}
2091
2093 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2094 Register Dst0Reg = MI.getOperand(0).getReg();
2095 Builder.buildTrunc(Dst0Reg, SrcReg);
2096 MI.eraseFromParent();
2097}
2098
2100 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2101 "Expected an unmerge");
2102 Register Dst0Reg = MI.getOperand(0).getReg();
2103 LLT Dst0Ty = MRI.getType(Dst0Reg);
2104 // G_ZEXT on vector applies to each lane, so it will
2105 // affect all destinations. Therefore we won't be able
2106 // to simplify the unmerge to just the first definition.
2107 if (Dst0Ty.isVector())
2108 return false;
2109 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2110 LLT SrcTy = MRI.getType(SrcReg);
2111 if (SrcTy.isVector())
2112 return false;
2113
2114 Register ZExtSrcReg;
2115 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2116 return false;
2117
2118 // Finally we can replace the first definition with
2119 // a zext of the source if the definition is big enough to hold
2120 // all of ZExtSrc bits.
2121 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2122 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2123}
2124
2126 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2127 "Expected an unmerge");
2128
2129 Register Dst0Reg = MI.getOperand(0).getReg();
2130
2131 MachineInstr *ZExtInstr =
2132 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2133 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2134 "Expecting a G_ZEXT");
2135
2136 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2137 LLT Dst0Ty = MRI.getType(Dst0Reg);
2138 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2139
2140 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2141 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2142 } else {
2143 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2144 "ZExt src doesn't fit in destination");
2145 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2146 }
2147
2148 Register ZeroReg;
2149 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2150 if (!ZeroReg)
2151 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2152 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2153 }
2154 MI.eraseFromParent();
2155}
2156
2158 unsigned TargetShiftSize,
2159 unsigned &ShiftVal) {
2160 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2161 MI.getOpcode() == TargetOpcode::G_LSHR ||
2162 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2163
2164 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2165 if (Ty.isVector()) // TODO:
2166 return false;
2167
2168 // Don't narrow further than the requested size.
2169 unsigned Size = Ty.getSizeInBits();
2170 if (Size <= TargetShiftSize)
2171 return false;
2172
2173 auto MaybeImmVal =
2174 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2175 if (!MaybeImmVal)
2176 return false;
2177
2178 ShiftVal = MaybeImmVal->Value.getSExtValue();
2179 return ShiftVal >= Size / 2 && ShiftVal < Size;
2180}
2181
2183 const unsigned &ShiftVal) {
2184 Register DstReg = MI.getOperand(0).getReg();
2185 Register SrcReg = MI.getOperand(1).getReg();
2186 LLT Ty = MRI.getType(SrcReg);
2187 unsigned Size = Ty.getSizeInBits();
2188 unsigned HalfSize = Size / 2;
2189 assert(ShiftVal >= HalfSize);
2190
2191 LLT HalfTy = LLT::scalar(HalfSize);
2192
2193 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2194 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2195
2196 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2197 Register Narrowed = Unmerge.getReg(1);
2198
2199 // dst = G_LSHR s64:x, C for C >= 32
2200 // =>
2201 // lo, hi = G_UNMERGE_VALUES x
2202 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2203
2204 if (NarrowShiftAmt != 0) {
2205 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2206 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2207 }
2208
2209 auto Zero = Builder.buildConstant(HalfTy, 0);
2210 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2211 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2212 Register Narrowed = Unmerge.getReg(0);
2213 // dst = G_SHL s64:x, C for C >= 32
2214 // =>
2215 // lo, hi = G_UNMERGE_VALUES x
2216 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2217 if (NarrowShiftAmt != 0) {
2218 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2219 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2220 }
2221
2222 auto Zero = Builder.buildConstant(HalfTy, 0);
2223 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2224 } else {
2225 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2226 auto Hi = Builder.buildAShr(
2227 HalfTy, Unmerge.getReg(1),
2228 Builder.buildConstant(HalfTy, HalfSize - 1));
2229
2230 if (ShiftVal == HalfSize) {
2231 // (G_ASHR i64:x, 32) ->
2232 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2233 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2234 } else if (ShiftVal == Size - 1) {
2235 // Don't need a second shift.
2236 // (G_ASHR i64:x, 63) ->
2237 // %narrowed = (G_ASHR hi_32(x), 31)
2238 // G_MERGE_VALUES %narrowed, %narrowed
2239 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2240 } else {
2241 auto Lo = Builder.buildAShr(
2242 HalfTy, Unmerge.getReg(1),
2243 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2244
2245 // (G_ASHR i64:x, C) ->, for C >= 32
2246 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2247 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2248 }
2249 }
2250
2251 MI.eraseFromParent();
2252}
2253
2255 unsigned TargetShiftAmount) {
2256 unsigned ShiftAmt;
2257 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2258 applyCombineShiftToUnmerge(MI, ShiftAmt);
2259 return true;
2260 }
2261
2262 return false;
2263}
2264
2266 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2267 Register DstReg = MI.getOperand(0).getReg();
2268 LLT DstTy = MRI.getType(DstReg);
2269 Register SrcReg = MI.getOperand(1).getReg();
2270 return mi_match(SrcReg, MRI,
2271 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2272}
2273
2275 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2276 Register DstReg = MI.getOperand(0).getReg();
2277 Builder.buildCopy(DstReg, Reg);
2278 MI.eraseFromParent();
2279}
2280
2282 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2283 Register DstReg = MI.getOperand(0).getReg();
2284 Builder.buildZExtOrTrunc(DstReg, Reg);
2285 MI.eraseFromParent();
2286}
2287
2289 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2290 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2291 Register LHS = MI.getOperand(1).getReg();
2292 Register RHS = MI.getOperand(2).getReg();
2293 LLT IntTy = MRI.getType(LHS);
2294
2295 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2296 // instruction.
2297 PtrReg.second = false;
2298 for (Register SrcReg : {LHS, RHS}) {
2299 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2300 // Don't handle cases where the integer is implicitly converted to the
2301 // pointer width.
2302 LLT PtrTy = MRI.getType(PtrReg.first);
2303 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2304 return true;
2305 }
2306
2307 PtrReg.second = true;
2308 }
2309
2310 return false;
2311}
2312
2314 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2315 Register Dst = MI.getOperand(0).getReg();
2316 Register LHS = MI.getOperand(1).getReg();
2317 Register RHS = MI.getOperand(2).getReg();
2318
2319 const bool DoCommute = PtrReg.second;
2320 if (DoCommute)
2321 std::swap(LHS, RHS);
2322 LHS = PtrReg.first;
2323
2324 LLT PtrTy = MRI.getType(LHS);
2325
2326 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2327 Builder.buildPtrToInt(Dst, PtrAdd);
2328 MI.eraseFromParent();
2329}
2330
2332 APInt &NewCst) {
2333 auto &PtrAdd = cast<GPtrAdd>(MI);
2334 Register LHS = PtrAdd.getBaseReg();
2335 Register RHS = PtrAdd.getOffsetReg();
2337
2338 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2339 APInt Cst;
2340 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2341 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2342 // G_INTTOPTR uses zero-extension
2343 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2344 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2345 return true;
2346 }
2347 }
2348
2349 return false;
2350}
2351
2353 APInt &NewCst) {
2354 auto &PtrAdd = cast<GPtrAdd>(MI);
2355 Register Dst = PtrAdd.getReg(0);
2356
2357 Builder.buildConstant(Dst, NewCst);
2358 PtrAdd.eraseFromParent();
2359}
2360
2362 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2363 Register DstReg = MI.getOperand(0).getReg();
2364 Register SrcReg = MI.getOperand(1).getReg();
2365 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2366 if (OriginalSrcReg.isValid())
2367 SrcReg = OriginalSrcReg;
2368 LLT DstTy = MRI.getType(DstReg);
2369 return mi_match(SrcReg, MRI,
2370 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2371}
2372
2374 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2375 Register DstReg = MI.getOperand(0).getReg();
2376 Register SrcReg = MI.getOperand(1).getReg();
2377 LLT DstTy = MRI.getType(DstReg);
2378 if (mi_match(SrcReg, MRI,
2379 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2380 unsigned DstSize = DstTy.getScalarSizeInBits();
2381 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2382 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2383 }
2384 return false;
2385}
2386
2388 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2389 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2390 MI.getOpcode() == TargetOpcode::G_SEXT ||
2391 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2392 "Expected a G_[ASZ]EXT");
2393 Register SrcReg = MI.getOperand(1).getReg();
2394 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2395 if (OriginalSrcReg.isValid())
2396 SrcReg = OriginalSrcReg;
2397 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2398 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2399 unsigned Opc = MI.getOpcode();
2400 unsigned SrcOpc = SrcMI->getOpcode();
2401 if (Opc == SrcOpc ||
2402 (Opc == TargetOpcode::G_ANYEXT &&
2403 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2404 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2405 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2406 return true;
2407 }
2408 return false;
2409}
2410
2412 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2413 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2414 MI.getOpcode() == TargetOpcode::G_SEXT ||
2415 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2416 "Expected a G_[ASZ]EXT");
2417
2418 Register Reg = std::get<0>(MatchInfo);
2419 unsigned SrcExtOp = std::get<1>(MatchInfo);
2420
2421 // Combine exts with the same opcode.
2422 if (MI.getOpcode() == SrcExtOp) {
2424 MI.getOperand(1).setReg(Reg);
2426 return;
2427 }
2428
2429 // Combine:
2430 // - anyext([sz]ext x) to [sz]ext x
2431 // - sext(zext x) to zext x
2432 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2433 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2434 SrcExtOp == TargetOpcode::G_ZEXT)) {
2435 Register DstReg = MI.getOperand(0).getReg();
2436 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2437 MI.eraseFromParent();
2438 }
2439}
2440
2442 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2443 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2444 Register SrcReg = MI.getOperand(1).getReg();
2445 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2446 unsigned SrcOpc = SrcMI->getOpcode();
2447 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2448 SrcOpc == TargetOpcode::G_ZEXT) {
2449 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2450 return true;
2451 }
2452 return false;
2453}
2454
2456 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2457 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2458 Register SrcReg = MatchInfo.first;
2459 unsigned SrcExtOp = MatchInfo.second;
2460 Register DstReg = MI.getOperand(0).getReg();
2461 LLT SrcTy = MRI.getType(SrcReg);
2462 LLT DstTy = MRI.getType(DstReg);
2463 if (SrcTy == DstTy) {
2464 MI.eraseFromParent();
2465 replaceRegWith(MRI, DstReg, SrcReg);
2466 return;
2467 }
2468 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2469 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2470 else
2471 Builder.buildTrunc(DstReg, SrcReg);
2472 MI.eraseFromParent();
2473}
2474
2476 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2477 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2478
2479 // ShiftTy > 32 > TruncTy -> 32
2480 if (ShiftSize > 32 && TruncSize < 32)
2481 return ShiftTy.changeElementSize(32);
2482
2483 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2484 // Some targets like it, some don't, some only like it under certain
2485 // conditions/processor versions, etc.
2486 // A TL hook might be needed for this.
2487
2488 // Don't combine
2489 return ShiftTy;
2490}
2491
2493 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2494 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2495 Register DstReg = MI.getOperand(0).getReg();
2496 Register SrcReg = MI.getOperand(1).getReg();
2497
2498 if (!MRI.hasOneNonDBGUse(SrcReg))
2499 return false;
2500
2501 LLT SrcTy = MRI.getType(SrcReg);
2502 LLT DstTy = MRI.getType(DstReg);
2503
2504 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2505 const auto &TL = getTargetLowering();
2506
2507 LLT NewShiftTy;
2508 switch (SrcMI->getOpcode()) {
2509 default:
2510 return false;
2511 case TargetOpcode::G_SHL: {
2512 NewShiftTy = DstTy;
2513
2514 // Make sure new shift amount is legal.
2515 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2516 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2517 return false;
2518 break;
2519 }
2520 case TargetOpcode::G_LSHR:
2521 case TargetOpcode::G_ASHR: {
2522 // For right shifts, we conservatively do not do the transform if the TRUNC
2523 // has any STORE users. The reason is that if we change the type of the
2524 // shift, we may break the truncstore combine.
2525 //
2526 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2527 for (auto &User : MRI.use_instructions(DstReg))
2528 if (User.getOpcode() == TargetOpcode::G_STORE)
2529 return false;
2530
2531 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2532 if (NewShiftTy == SrcTy)
2533 return false;
2534
2535 // Make sure we won't lose information by truncating the high bits.
2536 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2537 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2538 DstTy.getScalarSizeInBits()))
2539 return false;
2540 break;
2541 }
2542 }
2543
2545 {SrcMI->getOpcode(),
2546 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2547 return false;
2548
2549 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2550 return true;
2551}
2552
2554 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2555 MachineInstr *ShiftMI = MatchInfo.first;
2556 LLT NewShiftTy = MatchInfo.second;
2557
2558 Register Dst = MI.getOperand(0).getReg();
2559 LLT DstTy = MRI.getType(Dst);
2560
2561 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2562 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2563 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2564
2565 Register NewShift =
2566 Builder
2567 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2568 .getReg(0);
2569
2570 if (NewShiftTy == DstTy)
2571 replaceRegWith(MRI, Dst, NewShift);
2572 else
2573 Builder.buildTrunc(Dst, NewShift);
2574
2575 eraseInst(MI);
2576}
2577
2579 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2580 return MO.isReg() &&
2581 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2582 });
2583}
2584
2586 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2587 return !MO.isReg() ||
2588 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2589 });
2590}
2591
2593 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2594 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2595 return all_of(Mask, [](int Elt) { return Elt < 0; });
2596}
2597
2599 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2600 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2601 MRI);
2602}
2603
2605 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2606 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2607 MRI);
2608}
2609
2611 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2612 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2613 "Expected an insert/extract element op");
2614 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2615 unsigned IdxIdx =
2616 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2617 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2618 if (!Idx)
2619 return false;
2620 return Idx->getZExtValue() >= VecTy.getNumElements();
2621}
2622
2624 GSelect &SelMI = cast<GSelect>(MI);
2625 auto Cst =
2627 if (!Cst)
2628 return false;
2629 OpIdx = Cst->isZero() ? 3 : 2;
2630 return true;
2631}
2632
2633void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2634
2636 const MachineOperand &MOP2) {
2637 if (!MOP1.isReg() || !MOP2.isReg())
2638 return false;
2639 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2640 if (!InstAndDef1)
2641 return false;
2642 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2643 if (!InstAndDef2)
2644 return false;
2645 MachineInstr *I1 = InstAndDef1->MI;
2646 MachineInstr *I2 = InstAndDef2->MI;
2647
2648 // Handle a case like this:
2649 //
2650 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2651 //
2652 // Even though %0 and %1 are produced by the same instruction they are not
2653 // the same values.
2654 if (I1 == I2)
2655 return MOP1.getReg() == MOP2.getReg();
2656
2657 // If we have an instruction which loads or stores, we can't guarantee that
2658 // it is identical.
2659 //
2660 // For example, we may have
2661 //
2662 // %x1 = G_LOAD %addr (load N from @somewhere)
2663 // ...
2664 // call @foo
2665 // ...
2666 // %x2 = G_LOAD %addr (load N from @somewhere)
2667 // ...
2668 // %or = G_OR %x1, %x2
2669 //
2670 // It's possible that @foo will modify whatever lives at the address we're
2671 // loading from. To be safe, let's just assume that all loads and stores
2672 // are different (unless we have something which is guaranteed to not
2673 // change.)
2674 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2675 return false;
2676
2677 // If both instructions are loads or stores, they are equal only if both
2678 // are dereferenceable invariant loads with the same number of bits.
2679 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2680 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2681 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2682 if (!LS1 || !LS2)
2683 return false;
2684
2685 if (!I2->isDereferenceableInvariantLoad() ||
2686 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2687 return false;
2688 }
2689
2690 // Check for physical registers on the instructions first to avoid cases
2691 // like this:
2692 //
2693 // %a = COPY $physreg
2694 // ...
2695 // SOMETHING implicit-def $physreg
2696 // ...
2697 // %b = COPY $physreg
2698 //
2699 // These copies are not equivalent.
2700 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2701 return MO.isReg() && MO.getReg().isPhysical();
2702 })) {
2703 // Check if we have a case like this:
2704 //
2705 // %a = COPY $physreg
2706 // %b = COPY %a
2707 //
2708 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2709 // From that, we know that they must have the same value, since they must
2710 // have come from the same COPY.
2711 return I1->isIdenticalTo(*I2);
2712 }
2713
2714 // We don't have any physical registers, so we don't necessarily need the
2715 // same vreg defs.
2716 //
2717 // On the off-chance that there's some target instruction feeding into the
2718 // instruction, let's use produceSameValue instead of isIdenticalTo.
2719 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2720 // Handle instructions with multiple defs that produce same values. Values
2721 // are same for operands with same index.
2722 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2723 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2724 // I1 and I2 are different instructions but produce same values,
2725 // %1 and %6 are same, %1 and %7 are not the same value.
2726 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
2727 I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
2728 }
2729 return false;
2730}
2731
2733 if (!MOP.isReg())
2734 return false;
2735 auto *MI = MRI.getVRegDef(MOP.getReg());
2736 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2737 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2738 MaybeCst->getSExtValue() == C;
2739}
2740
2742 if (!MOP.isReg())
2743 return false;
2744 std::optional<FPValueAndVReg> MaybeCst;
2745 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2746 return false;
2747
2748 return MaybeCst->Value.isExactlyValue(C);
2749}
2750
2752 unsigned OpIdx) {
2753 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2754 Register OldReg = MI.getOperand(0).getReg();
2755 Register Replacement = MI.getOperand(OpIdx).getReg();
2756 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2757 MI.eraseFromParent();
2758 replaceRegWith(MRI, OldReg, Replacement);
2759}
2760
2762 Register Replacement) {
2763 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2764 Register OldReg = MI.getOperand(0).getReg();
2765 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2766 MI.eraseFromParent();
2767 replaceRegWith(MRI, OldReg, Replacement);
2768}
2769
2771 unsigned ConstIdx) {
2772 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2773 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2774
2775 // Get the shift amount
2776 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2777 if (!VRegAndVal)
2778 return false;
2779
2780 // Return true of shift amount >= Bitwidth
2781 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2782}
2783
2785 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2786 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2787 "This is not a funnel shift operation");
2788
2789 Register ConstReg = MI.getOperand(3).getReg();
2790 LLT ConstTy = MRI.getType(ConstReg);
2791 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2792
2793 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2794 assert((VRegAndVal) && "Value is not a constant");
2795
2796 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2797 APInt NewConst = VRegAndVal->Value.urem(
2798 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2799
2800 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2802 MI.getOpcode(), {MI.getOperand(0)},
2803 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2804
2805 MI.eraseFromParent();
2806}
2807
2809 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2810 // Match (cond ? x : x)
2811 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2812 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2813 MRI);
2814}
2815
2817 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2818 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2819 MRI);
2820}
2821
2823 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2824 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2825 MRI);
2826}
2827
2829 MachineOperand &MO = MI.getOperand(OpIdx);
2830 return MO.isReg() &&
2831 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2832}
2833
2835 unsigned OpIdx) {
2836 MachineOperand &MO = MI.getOperand(OpIdx);
2837 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2838}
2839
2841 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2842 Builder.buildFConstant(MI.getOperand(0), C);
2843 MI.eraseFromParent();
2844}
2845
2847 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2848 Builder.buildConstant(MI.getOperand(0), C);
2849 MI.eraseFromParent();
2850}
2851
2853 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2854 Builder.buildConstant(MI.getOperand(0), C);
2855 MI.eraseFromParent();
2856}
2857
2859 ConstantFP *CFP) {
2860 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2861 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2862 MI.eraseFromParent();
2863}
2864
2866 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2867 Builder.buildUndef(MI.getOperand(0));
2868 MI.eraseFromParent();
2869}
2870
2872 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2873 Register LHS = MI.getOperand(1).getReg();
2874 Register RHS = MI.getOperand(2).getReg();
2875 Register &NewLHS = std::get<0>(MatchInfo);
2876 Register &NewRHS = std::get<1>(MatchInfo);
2877
2878 // Helper lambda to check for opportunities for
2879 // ((0-A) + B) -> B - A
2880 // (A + (0-B)) -> A - B
2881 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2882 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2883 return false;
2884 NewLHS = MaybeNewLHS;
2885 return true;
2886 };
2887
2888 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2889}
2890
2893 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2894 "Invalid opcode");
2895 Register DstReg = MI.getOperand(0).getReg();
2896 LLT DstTy = MRI.getType(DstReg);
2897 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2898 unsigned NumElts = DstTy.getNumElements();
2899 // If this MI is part of a sequence of insert_vec_elts, then
2900 // don't do the combine in the middle of the sequence.
2901 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2902 TargetOpcode::G_INSERT_VECTOR_ELT)
2903 return false;
2904 MachineInstr *CurrInst = &MI;
2905 MachineInstr *TmpInst;
2906 int64_t IntImm;
2907 Register TmpReg;
2908 MatchInfo.resize(NumElts);
2909 while (mi_match(
2910 CurrInst->getOperand(0).getReg(), MRI,
2911 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2912 if (IntImm >= NumElts || IntImm < 0)
2913 return false;
2914 if (!MatchInfo[IntImm])
2915 MatchInfo[IntImm] = TmpReg;
2916 CurrInst = TmpInst;
2917 }
2918 // Variable index.
2919 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2920 return false;
2921 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2922 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2923 if (!MatchInfo[I - 1].isValid())
2924 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2925 }
2926 return true;
2927 }
2928 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
2929 // overwritten, bail out.
2930 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
2931 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
2932}
2933
2936 Register UndefReg;
2937 auto GetUndef = [&]() {
2938 if (UndefReg)
2939 return UndefReg;
2940 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2941 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2942 return UndefReg;
2943 };
2944 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2945 if (!MatchInfo[I])
2946 MatchInfo[I] = GetUndef();
2947 }
2948 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2949 MI.eraseFromParent();
2950}
2951
2953 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2954 Register SubLHS, SubRHS;
2955 std::tie(SubLHS, SubRHS) = MatchInfo;
2956 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2957 MI.eraseFromParent();
2958}
2959
2962 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2963 //
2964 // Creates the new hand + logic instruction (but does not insert them.)
2965 //
2966 // On success, MatchInfo is populated with the new instructions. These are
2967 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2968 unsigned LogicOpcode = MI.getOpcode();
2969 assert(LogicOpcode == TargetOpcode::G_AND ||
2970 LogicOpcode == TargetOpcode::G_OR ||
2971 LogicOpcode == TargetOpcode::G_XOR);
2972 MachineIRBuilder MIB(MI);
2973 Register Dst = MI.getOperand(0).getReg();
2974 Register LHSReg = MI.getOperand(1).getReg();
2975 Register RHSReg = MI.getOperand(2).getReg();
2976
2977 // Don't recompute anything.
2978 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2979 return false;
2980
2981 // Make sure we have (hand x, ...), (hand y, ...)
2982 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2983 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2984 if (!LeftHandInst || !RightHandInst)
2985 return false;
2986 unsigned HandOpcode = LeftHandInst->getOpcode();
2987 if (HandOpcode != RightHandInst->getOpcode())
2988 return false;
2989 if (!LeftHandInst->getOperand(1).isReg() ||
2990 !RightHandInst->getOperand(1).isReg())
2991 return false;
2992
2993 // Make sure the types match up, and if we're doing this post-legalization,
2994 // we end up with legal types.
2995 Register X = LeftHandInst->getOperand(1).getReg();
2996 Register Y = RightHandInst->getOperand(1).getReg();
2997 LLT XTy = MRI.getType(X);
2998 LLT YTy = MRI.getType(Y);
2999 if (!XTy.isValid() || XTy != YTy)
3000 return false;
3001
3002 // Optional extra source register.
3003 Register ExtraHandOpSrcReg;
3004 switch (HandOpcode) {
3005 default:
3006 return false;
3007 case TargetOpcode::G_ANYEXT:
3008 case TargetOpcode::G_SEXT:
3009 case TargetOpcode::G_ZEXT: {
3010 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3011 break;
3012 }
3013 case TargetOpcode::G_AND:
3014 case TargetOpcode::G_ASHR:
3015 case TargetOpcode::G_LSHR:
3016 case TargetOpcode::G_SHL: {
3017 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3018 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3019 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3020 return false;
3021 ExtraHandOpSrcReg = ZOp.getReg();
3022 break;
3023 }
3024 }
3025
3026 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3027 return false;
3028
3029 // Record the steps to build the new instructions.
3030 //
3031 // Steps to build (logic x, y)
3032 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3033 OperandBuildSteps LogicBuildSteps = {
3034 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3035 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3036 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3037 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3038
3039 // Steps to build hand (logic x, y), ...z
3040 OperandBuildSteps HandBuildSteps = {
3041 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3042 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3043 if (ExtraHandOpSrcReg.isValid())
3044 HandBuildSteps.push_back(
3045 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3046 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3047
3048 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3049 return true;
3050}
3051
3054 assert(MatchInfo.InstrsToBuild.size() &&
3055 "Expected at least one instr to build?");
3056 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3057 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3058 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3059 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3060 for (auto &OperandFn : InstrToBuild.OperandFns)
3061 OperandFn(Instr);
3062 }
3063 MI.eraseFromParent();
3064}
3065
3067 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3068 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3069 int64_t ShlCst, AshrCst;
3070 Register Src;
3071 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3072 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3073 m_ICstOrSplat(AshrCst))))
3074 return false;
3075 if (ShlCst != AshrCst)
3076 return false;
3078 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3079 return false;
3080 MatchInfo = std::make_tuple(Src, ShlCst);
3081 return true;
3082}
3083
3085 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3086 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3087 Register Src;
3088 int64_t ShiftAmt;
3089 std::tie(Src, ShiftAmt) = MatchInfo;
3090 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3091 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3092 MI.eraseFromParent();
3093}
3094
3095/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3097 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3098 assert(MI.getOpcode() == TargetOpcode::G_AND);
3099
3100 Register Dst = MI.getOperand(0).getReg();
3101 LLT Ty = MRI.getType(Dst);
3102
3103 Register R;
3104 int64_t C1;
3105 int64_t C2;
3106 if (!mi_match(
3107 Dst, MRI,
3108 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3109 return false;
3110
3111 MatchInfo = [=](MachineIRBuilder &B) {
3112 if (C1 & C2) {
3113 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3114 return;
3115 }
3116 auto Zero = B.buildConstant(Ty, 0);
3117 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3118 };
3119 return true;
3120}
3121
3123 Register &Replacement) {
3124 // Given
3125 //
3126 // %y:_(sN) = G_SOMETHING
3127 // %x:_(sN) = G_SOMETHING
3128 // %res:_(sN) = G_AND %x, %y
3129 //
3130 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3131 //
3132 // Patterns like this can appear as a result of legalization. E.g.
3133 //
3134 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3135 // %one:_(s32) = G_CONSTANT i32 1
3136 // %and:_(s32) = G_AND %cmp, %one
3137 //
3138 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3139 assert(MI.getOpcode() == TargetOpcode::G_AND);
3140 if (!KB)
3141 return false;
3142
3143 Register AndDst = MI.getOperand(0).getReg();
3144 Register LHS = MI.getOperand(1).getReg();
3145 Register RHS = MI.getOperand(2).getReg();
3146 KnownBits LHSBits = KB->getKnownBits(LHS);
3147 KnownBits RHSBits = KB->getKnownBits(RHS);
3148
3149 // Check that x & Mask == x.
3150 // x & 1 == x, always
3151 // x & 0 == x, only if x is also 0
3152 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3153 //
3154 // Check if we can replace AndDst with the LHS of the G_AND
3155 if (canReplaceReg(AndDst, LHS, MRI) &&
3156 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3157 Replacement = LHS;
3158 return true;
3159 }
3160
3161 // Check if we can replace AndDst with the RHS of the G_AND
3162 if (canReplaceReg(AndDst, RHS, MRI) &&
3163 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3164 Replacement = RHS;
3165 return true;
3166 }
3167
3168 return false;
3169}
3170
3172 // Given
3173 //
3174 // %y:_(sN) = G_SOMETHING
3175 // %x:_(sN) = G_SOMETHING
3176 // %res:_(sN) = G_OR %x, %y
3177 //
3178 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3179 assert(MI.getOpcode() == TargetOpcode::G_OR);
3180 if (!KB)
3181 return false;
3182
3183 Register OrDst = MI.getOperand(0).getReg();
3184 Register LHS = MI.getOperand(1).getReg();
3185 Register RHS = MI.getOperand(2).getReg();
3186 KnownBits LHSBits = KB->getKnownBits(LHS);
3187 KnownBits RHSBits = KB->getKnownBits(RHS);
3188
3189 // Check that x | Mask == x.
3190 // x | 0 == x, always
3191 // x | 1 == x, only if x is also 1
3192 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3193 //
3194 // Check if we can replace OrDst with the LHS of the G_OR
3195 if (canReplaceReg(OrDst, LHS, MRI) &&
3196 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3197 Replacement = LHS;
3198 return true;
3199 }
3200
3201 // Check if we can replace OrDst with the RHS of the G_OR
3202 if (canReplaceReg(OrDst, RHS, MRI) &&
3203 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3204 Replacement = RHS;
3205 return true;
3206 }
3207
3208 return false;
3209}
3210
3212 // If the input is already sign extended, just drop the extension.
3213 Register Src = MI.getOperand(1).getReg();
3214 unsigned ExtBits = MI.getOperand(2).getImm();
3215 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3216 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3217}
3218
3219static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3220 int64_t Cst, bool IsVector, bool IsFP) {
3221 // For i1, Cst will always be -1 regardless of boolean contents.
3222 return (ScalarSizeBits == 1 && Cst == -1) ||
3223 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3224}
3225
3227 SmallVectorImpl<Register> &RegsToNegate) {
3228 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3229 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3230 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3231 Register XorSrc;
3232 Register CstReg;
3233 // We match xor(src, true) here.
3234 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3235 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3236 return false;
3237
3238 if (!MRI.hasOneNonDBGUse(XorSrc))
3239 return false;
3240
3241 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3242 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3243 // list of tree nodes to visit.
3244 RegsToNegate.push_back(XorSrc);
3245 // Remember whether the comparisons are all integer or all floating point.
3246 bool IsInt = false;
3247 bool IsFP = false;
3248 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3249 Register Reg = RegsToNegate[I];
3250 if (!MRI.hasOneNonDBGUse(Reg))
3251 return false;
3252 MachineInstr *Def = MRI.getVRegDef(Reg);
3253 switch (Def->getOpcode()) {
3254 default:
3255 // Don't match if the tree contains anything other than ANDs, ORs and
3256 // comparisons.
3257 return false;
3258 case TargetOpcode::G_ICMP:
3259 if (IsFP)
3260 return false;
3261 IsInt = true;
3262 // When we apply the combine we will invert the predicate.
3263 break;
3264 case TargetOpcode::G_FCMP:
3265 if (IsInt)
3266 return false;
3267 IsFP = true;
3268 // When we apply the combine we will invert the predicate.
3269 break;
3270 case TargetOpcode::G_AND:
3271 case TargetOpcode::G_OR:
3272 // Implement De Morgan's laws:
3273 // ~(x & y) -> ~x | ~y
3274 // ~(x | y) -> ~x & ~y
3275 // When we apply the combine we will change the opcode and recursively
3276 // negate the operands.
3277 RegsToNegate.push_back(Def->getOperand(1).getReg());
3278 RegsToNegate.push_back(Def->getOperand(2).getReg());
3279 break;
3280 }
3281 }
3282
3283 // Now we know whether the comparisons are integer or floating point, check
3284 // the constant in the xor.
3285 int64_t Cst;
3286 if (Ty.isVector()) {
3287 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3288 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3289 if (!MaybeCst)
3290 return false;
3291 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3292 return false;
3293 } else {
3294 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3295 return false;
3296 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3297 return false;
3298 }
3299
3300 return true;
3301}
3302
3304 SmallVectorImpl<Register> &RegsToNegate) {
3305 for (Register Reg : RegsToNegate) {
3306 MachineInstr *Def = MRI.getVRegDef(Reg);
3307 Observer.changingInstr(*Def);
3308 // For each comparison, invert the opcode. For each AND and OR, change the
3309 // opcode.
3310 switch (Def->getOpcode()) {
3311 default:
3312 llvm_unreachable("Unexpected opcode");
3313 case TargetOpcode::G_ICMP:
3314 case TargetOpcode::G_FCMP: {
3315 MachineOperand &PredOp = Def->getOperand(1);
3318 PredOp.setPredicate(NewP);
3319 break;
3320 }
3321 case TargetOpcode::G_AND:
3322 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3323 break;
3324 case TargetOpcode::G_OR:
3325 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3326 break;
3327 }
3328 Observer.changedInstr(*Def);
3329 }
3330
3331 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3332 MI.eraseFromParent();
3333}
3334
3336 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3337 // Match (xor (and x, y), y) (or any of its commuted cases)
3338 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3339 Register &X = MatchInfo.first;
3340 Register &Y = MatchInfo.second;
3341 Register AndReg = MI.getOperand(1).getReg();
3342 Register SharedReg = MI.getOperand(2).getReg();
3343
3344 // Find a G_AND on either side of the G_XOR.
3345 // Look for one of
3346 //
3347 // (xor (and x, y), SharedReg)
3348 // (xor SharedReg, (and x, y))
3349 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3350 std::swap(AndReg, SharedReg);
3351 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3352 return false;
3353 }
3354
3355 // Only do this if we'll eliminate the G_AND.
3356 if (!MRI.hasOneNonDBGUse(AndReg))
3357 return false;
3358
3359 // We can combine if SharedReg is the same as either the LHS or RHS of the
3360 // G_AND.
3361 if (Y != SharedReg)
3362 std::swap(X, Y);
3363 return Y == SharedReg;
3364}
3365
3367 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3368 // Fold (xor (and x, y), y) -> (and (not x), y)
3369 Register X, Y;
3370 std::tie(X, Y) = MatchInfo;
3371 auto Not = Builder.buildNot(MRI.getType(X), X);
3373 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3374 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3375 MI.getOperand(2).setReg(Y);
3377}
3378
3380 auto &PtrAdd = cast<GPtrAdd>(MI);
3381 Register DstReg = PtrAdd.getReg(0);
3382 LLT Ty = MRI.getType(DstReg);
3384
3385 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3386 return false;
3387
3388 if (Ty.isPointer()) {
3389 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3390 return ConstVal && *ConstVal == 0;
3391 }
3392
3393 assert(Ty.isVector() && "Expecting a vector type");
3394 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3395 return isBuildVectorAllZeros(*VecMI, MRI);
3396}
3397
3399 auto &PtrAdd = cast<GPtrAdd>(MI);
3400 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3401 PtrAdd.eraseFromParent();
3402}
3403
3404/// The second source operand is known to be a power of 2.
3406 Register DstReg = MI.getOperand(0).getReg();
3407 Register Src0 = MI.getOperand(1).getReg();
3408 Register Pow2Src1 = MI.getOperand(2).getReg();
3409 LLT Ty = MRI.getType(DstReg);
3410
3411 // Fold (urem x, pow2) -> (and x, pow2-1)
3412 auto NegOne = Builder.buildConstant(Ty, -1);
3413 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3414 Builder.buildAnd(DstReg, Src0, Add);
3415 MI.eraseFromParent();
3416}
3417
3419 unsigned &SelectOpNo) {
3420 Register LHS = MI.getOperand(1).getReg();
3421 Register RHS = MI.getOperand(2).getReg();
3422
3423 Register OtherOperandReg = RHS;
3424 SelectOpNo = 1;
3426
3427 // Don't do this unless the old select is going away. We want to eliminate the
3428 // binary operator, not replace a binop with a select.
3429 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3431 OtherOperandReg = LHS;
3432 SelectOpNo = 2;
3434 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3436 return false;
3437 }
3438
3439 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3440 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3441
3442 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3443 /*AllowFP*/ true,
3444 /*AllowOpaqueConstants*/ false))
3445 return false;
3446 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3447 /*AllowFP*/ true,
3448 /*AllowOpaqueConstants*/ false))
3449 return false;
3450
3451 unsigned BinOpcode = MI.getOpcode();
3452
3453 // We know that one of the operands is a select of constants. Now verify that
3454 // the other binary operator operand is either a constant, or we can handle a
3455 // variable.
3456 bool CanFoldNonConst =
3457 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3458 (isNullOrNullSplat(*SelectLHS, MRI) ||
3459 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3460 (isNullOrNullSplat(*SelectRHS, MRI) ||
3461 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3462 if (CanFoldNonConst)
3463 return true;
3464
3465 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3466 /*AllowFP*/ true,
3467 /*AllowOpaqueConstants*/ false);
3468}
3469
3470/// \p SelectOperand is the operand in binary operator \p MI that is the select
3471/// to fold.
3473 const unsigned &SelectOperand) {
3474 Register Dst = MI.getOperand(0).getReg();
3475 Register LHS = MI.getOperand(1).getReg();
3476 Register RHS = MI.getOperand(2).getReg();
3477 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3478
3479 Register SelectCond = Select->getOperand(1).getReg();
3480 Register SelectTrue = Select->getOperand(2).getReg();
3481 Register SelectFalse = Select->getOperand(3).getReg();
3482
3483 LLT Ty = MRI.getType(Dst);
3484 unsigned BinOpcode = MI.getOpcode();
3485
3486 Register FoldTrue, FoldFalse;
3487
3488 // We have a select-of-constants followed by a binary operator with a
3489 // constant. Eliminate the binop by pulling the constant math into the select.
3490 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3491 if (SelectOperand == 1) {
3492 // TODO: SelectionDAG verifies this actually constant folds before
3493 // committing to the combine.
3494
3495 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3496 FoldFalse =
3497 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3498 } else {
3499 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3500 FoldFalse =
3501 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3502 }
3503
3504 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3505 MI.eraseFromParent();
3506}
3507
3508std::optional<SmallVector<Register, 8>>
3509CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3510 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3511 // We want to detect if Root is part of a tree which represents a bunch
3512 // of loads being merged into a larger load. We'll try to recognize patterns
3513 // like, for example:
3514 //
3515 // Reg Reg
3516 // \ /
3517 // OR_1 Reg
3518 // \ /
3519 // OR_2
3520 // \ Reg
3521 // .. /
3522 // Root
3523 //
3524 // Reg Reg Reg Reg
3525 // \ / \ /
3526 // OR_1 OR_2
3527 // \ /
3528 // \ /
3529 // ...
3530 // Root
3531 //
3532 // Each "Reg" may have been produced by a load + some arithmetic. This
3533 // function will save each of them.
3534 SmallVector<Register, 8> RegsToVisit;
3536
3537 // In the "worst" case, we're dealing with a load for each byte. So, there
3538 // are at most #bytes - 1 ORs.
3539 const unsigned MaxIter =
3540 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3541 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3542 if (Ors.empty())
3543 break;
3544 const MachineInstr *Curr = Ors.pop_back_val();
3545 Register OrLHS = Curr->getOperand(1).getReg();
3546 Register OrRHS = Curr->getOperand(2).getReg();
3547
3548 // In the combine, we want to elimate the entire tree.
3549 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3550 return std::nullopt;
3551
3552 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3553 // something that may be a load + arithmetic.
3554 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3555 Ors.push_back(Or);
3556 else
3557 RegsToVisit.push_back(OrLHS);
3558 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3559 Ors.push_back(Or);
3560 else
3561 RegsToVisit.push_back(OrRHS);
3562 }
3563
3564 // We're going to try and merge each register into a wider power-of-2 type,
3565 // so we ought to have an even number of registers.
3566 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3567 return std::nullopt;
3568 return RegsToVisit;
3569}
3570
3571/// Helper function for findLoadOffsetsForLoadOrCombine.
3572///
3573/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3574/// and then moving that value into a specific byte offset.
3575///
3576/// e.g. x[i] << 24
3577///
3578/// \returns The load instruction and the byte offset it is moved into.
3579static std::optional<std::pair<GZExtLoad *, int64_t>>
3580matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3581 const MachineRegisterInfo &MRI) {
3582 assert(MRI.hasOneNonDBGUse(Reg) &&
3583 "Expected Reg to only have one non-debug use?");
3584 Register MaybeLoad;
3585 int64_t Shift;
3586 if (!mi_match(Reg, MRI,
3587 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3588 Shift = 0;
3589 MaybeLoad = Reg;
3590 }
3591
3592 if (Shift % MemSizeInBits != 0)
3593 return std::nullopt;
3594
3595 // TODO: Handle other types of loads.
3596 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3597 if (!Load)
3598 return std::nullopt;
3599
3600 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3601 return std::nullopt;
3602
3603 return std::make_pair(Load, Shift / MemSizeInBits);
3604}
3605
3606std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3607CombinerHelper::findLoadOffsetsForLoadOrCombine(
3609 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3610
3611 // Each load found for the pattern. There should be one for each RegsToVisit.
3613
3614 // The lowest index used in any load. (The lowest "i" for each x[i].)
3615 int64_t LowestIdx = INT64_MAX;
3616
3617 // The load which uses the lowest index.
3618 GZExtLoad *LowestIdxLoad = nullptr;
3619
3620 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3621 SmallSet<int64_t, 8> SeenIdx;
3622
3623 // Ensure each load is in the same MBB.
3624 // TODO: Support multiple MachineBasicBlocks.
3625 MachineBasicBlock *MBB = nullptr;
3626 const MachineMemOperand *MMO = nullptr;
3627
3628 // Earliest instruction-order load in the pattern.
3629 GZExtLoad *EarliestLoad = nullptr;
3630
3631 // Latest instruction-order load in the pattern.
3632 GZExtLoad *LatestLoad = nullptr;
3633
3634 // Base pointer which every load should share.
3636
3637 // We want to find a load for each register. Each load should have some
3638 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3639 // track of the load which uses the lowest index. Later, we will check if we
3640 // can use its pointer in the final, combined load.
3641 for (auto Reg : RegsToVisit) {
3642 // Find the load, and find the position that it will end up in (e.g. a
3643 // shifted) value.
3644 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3645 if (!LoadAndPos)
3646 return std::nullopt;
3647 GZExtLoad *Load;
3648 int64_t DstPos;
3649 std::tie(Load, DstPos) = *LoadAndPos;
3650
3651 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3652 // it is difficult to check for stores/calls/etc between loads.
3653 MachineBasicBlock *LoadMBB = Load->getParent();
3654 if (!MBB)
3655 MBB = LoadMBB;
3656 if (LoadMBB != MBB)
3657 return std::nullopt;
3658
3659 // Make sure that the MachineMemOperands of every seen load are compatible.
3660 auto &LoadMMO = Load->getMMO();
3661 if (!MMO)
3662 MMO = &LoadMMO;
3663 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3664 return std::nullopt;
3665
3666 // Find out what the base pointer and index for the load is.
3667 Register LoadPtr;
3668 int64_t Idx;
3669 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3670 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3671 LoadPtr = Load->getOperand(1).getReg();
3672 Idx = 0;
3673 }
3674
3675 // Don't combine things like a[i], a[i] -> a bigger load.
3676 if (!SeenIdx.insert(Idx).second)
3677 return std::nullopt;
3678
3679 // Every load must share the same base pointer; don't combine things like:
3680 //
3681 // a[i], b[i + 1] -> a bigger load.
3682 if (!BasePtr.isValid())
3683 BasePtr = LoadPtr;
3684 if (BasePtr != LoadPtr)
3685 return std::nullopt;
3686
3687 if (Idx < LowestIdx) {
3688 LowestIdx = Idx;
3689 LowestIdxLoad = Load;
3690 }
3691
3692 // Keep track of the byte offset that this load ends up at. If we have seen
3693 // the byte offset, then stop here. We do not want to combine:
3694 //
3695 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3696 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3697 return std::nullopt;
3698 Loads.insert(Load);
3699
3700 // Keep track of the position of the earliest/latest loads in the pattern.
3701 // We will check that there are no load fold barriers between them later
3702 // on.
3703 //
3704 // FIXME: Is there a better way to check for load fold barriers?
3705 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3706 EarliestLoad = Load;
3707 if (!LatestLoad || dominates(*LatestLoad, *Load))
3708 LatestLoad = Load;
3709 }
3710
3711 // We found a load for each register. Let's check if each load satisfies the
3712 // pattern.
3713 assert(Loads.size() == RegsToVisit.size() &&
3714 "Expected to find a load for each register?");
3715 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3716 LatestLoad && "Expected at least two loads?");
3717
3718 // Check if there are any stores, calls, etc. between any of the loads. If
3719 // there are, then we can't safely perform the combine.
3720 //
3721 // MaxIter is chosen based off the (worst case) number of iterations it
3722 // typically takes to succeed in the LLVM test suite plus some padding.
3723 //
3724 // FIXME: Is there a better way to check for load fold barriers?
3725 const unsigned MaxIter = 20;
3726 unsigned Iter = 0;
3727 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3728 LatestLoad->getIterator())) {
3729 if (Loads.count(&MI))
3730 continue;
3731 if (MI.isLoadFoldBarrier())
3732 return std::nullopt;
3733 if (Iter++ == MaxIter)
3734 return std::nullopt;
3735 }
3736
3737 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3738}
3739
3741 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3742 assert(MI.getOpcode() == TargetOpcode::G_OR);
3743 MachineFunction &MF = *MI.getMF();
3744 // Assuming a little-endian target, transform:
3745 // s8 *a = ...
3746 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3747 // =>
3748 // s32 val = *((i32)a)
3749 //
3750 // s8 *a = ...
3751 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3752 // =>
3753 // s32 val = BSWAP(*((s32)a))
3754 Register Dst = MI.getOperand(0).getReg();
3755 LLT Ty = MRI.getType(Dst);
3756 if (Ty.isVector())
3757 return false;
3758
3759 // We need to combine at least two loads into this type. Since the smallest
3760 // possible load is into a byte, we need at least a 16-bit wide type.
3761 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3762 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3763 return false;
3764
3765 // Match a collection of non-OR instructions in the pattern.
3766 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3767 if (!RegsToVisit)
3768 return false;
3769
3770 // We have a collection of non-OR instructions. Figure out how wide each of
3771 // the small loads should be based off of the number of potential loads we
3772 // found.
3773 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3774 if (NarrowMemSizeInBits % 8 != 0)
3775 return false;
3776
3777 // Check if each register feeding into each OR is a load from the same
3778 // base pointer + some arithmetic.
3779 //
3780 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3781 //
3782 // Also verify that each of these ends up putting a[i] into the same memory
3783 // offset as a load into a wide type would.
3785 GZExtLoad *LowestIdxLoad, *LatestLoad;
3786 int64_t LowestIdx;
3787 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3788 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3789 if (!MaybeLoadInfo)
3790 return false;
3791 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3792
3793 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3794 // we found before, check if this corresponds to a big or little endian byte
3795 // pattern. If it does, then we can represent it using a load + possibly a
3796 // BSWAP.
3797 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3798 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3799 if (!IsBigEndian)
3800 return false;
3801 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3802 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3803 return false;
3804
3805 // Make sure that the load from the lowest index produces offset 0 in the
3806 // final value.
3807 //
3808 // This ensures that we won't combine something like this:
3809 //
3810 // load x[i] -> byte 2
3811 // load x[i+1] -> byte 0 ---> wide_load x[i]
3812 // load x[i+2] -> byte 1
3813 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3814 const unsigned ZeroByteOffset =
3815 *IsBigEndian
3816 ? bigEndianByteAt(NumLoadsInTy, 0)
3817 : littleEndianByteAt(NumLoadsInTy, 0);
3818 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3819 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3820 ZeroOffsetIdx->second != LowestIdx)
3821 return false;
3822
3823 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3824 // may not use index 0.
3825 Register Ptr = LowestIdxLoad->getPointerReg();
3826 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3827 LegalityQuery::MemDesc MMDesc(MMO);
3828 MMDesc.MemoryTy = Ty;
3830 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3831 return false;
3832 auto PtrInfo = MMO.getPointerInfo();
3833 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3834
3835 // Load must be allowed and fast on the target.
3837 auto &DL = MF.getDataLayout();
3838 unsigned Fast = 0;
3839 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3840 !Fast)
3841 return false;
3842
3843 MatchInfo = [=](MachineIRBuilder &MIB) {
3844 MIB.setInstrAndDebugLoc(*LatestLoad);
3845 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3846 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3847 if (NeedsBSwap)
3848 MIB.buildBSwap(Dst, LoadDst);
3849 };
3850 return true;
3851}
3852
3854 MachineInstr *&ExtMI) {
3855 auto &PHI = cast<GPhi>(MI);
3856 Register DstReg = PHI.getReg(0);
3857
3858 // TODO: Extending a vector may be expensive, don't do this until heuristics
3859 // are better.
3860 if (MRI.getType(DstReg).isVector())
3861 return false;
3862
3863 // Try to match a phi, whose only use is an extend.
3864 if (!MRI.hasOneNonDBGUse(DstReg))
3865 return false;
3866 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3867 switch (ExtMI->getOpcode()) {
3868 case TargetOpcode::G_ANYEXT:
3869 return true; // G_ANYEXT is usually free.
3870 case TargetOpcode::G_ZEXT:
3871 case TargetOpcode::G_SEXT:
3872 break;
3873 default:
3874 return false;
3875 }
3876
3877 // If the target is likely to fold this extend away, don't propagate.
3879 return false;
3880
3881 // We don't want to propagate the extends unless there's a good chance that
3882 // they'll be optimized in some way.
3883 // Collect the unique incoming values.
3885 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3886 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3887 switch (DefMI->getOpcode()) {
3888 case TargetOpcode::G_LOAD:
3889 case TargetOpcode::G_TRUNC:
3890 case TargetOpcode::G_SEXT:
3891 case TargetOpcode::G_ZEXT:
3892 case TargetOpcode::G_ANYEXT:
3893 case TargetOpcode::G_CONSTANT:
3894 InSrcs.insert(DefMI);
3895 // Don't try to propagate if there are too many places to create new
3896 // extends, chances are it'll increase code size.
3897 if (InSrcs.size() > 2)
3898 return false;
3899 break;
3900 default:
3901 return false;
3902 }
3903 }
3904 return true;
3905}
3906
3908 MachineInstr *&ExtMI) {
3909 auto &PHI = cast<GPhi>(MI);
3910 Register DstReg = ExtMI->getOperand(0).getReg();
3911 LLT ExtTy = MRI.getType(DstReg);
3912
3913 // Propagate the extension into the block of each incoming reg's block.
3914 // Use a SetVector here because PHIs can have duplicate edges, and we want
3915 // deterministic iteration order.
3918 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3919 auto SrcReg = PHI.getIncomingValue(I);
3920 auto *SrcMI = MRI.getVRegDef(SrcReg);
3921 if (!SrcMIs.insert(SrcMI))
3922 continue;
3923
3924 // Build an extend after each src inst.
3925 auto *MBB = SrcMI->getParent();
3926 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3927 if (InsertPt != MBB->end() && InsertPt->isPHI())
3928 InsertPt = MBB->getFirstNonPHI();
3929
3930 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3931 Builder.setDebugLoc(MI.getDebugLoc());
3932 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
3933 OldToNewSrcMap[SrcMI] = NewExt;
3934 }
3935
3936 // Create a new phi with the extended inputs.
3938 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3939 NewPhi.addDef(DstReg);
3940 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
3941 if (!MO.isReg()) {
3942 NewPhi.addMBB(MO.getMBB());
3943 continue;
3944 }
3945 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3946 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3947 }
3948 Builder.insertInstr(NewPhi);
3949 ExtMI->eraseFromParent();
3950}
3951
3953 Register &Reg) {
3954 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3955 // If we have a constant index, look for a G_BUILD_VECTOR source
3956 // and find the source register that the index maps to.
3957 Register SrcVec = MI.getOperand(1).getReg();
3958 LLT SrcTy = MRI.getType(SrcVec);
3959
3960 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3961 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3962 return false;
3963
3964 unsigned VecIdx = Cst->Value.getZExtValue();
3965
3966 // Check if we have a build_vector or build_vector_trunc with an optional
3967 // trunc in front.
3968 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
3969 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
3970 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
3971 }
3972
3973 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
3974 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
3975 return false;
3976
3977 EVT Ty(getMVTForLLT(SrcTy));
3978 if (!MRI.hasOneNonDBGUse(SrcVec) &&
3979 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3980 return false;
3981
3982 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
3983 return true;
3984}
3985
3987 Register &Reg) {
3988 // Check the type of the register, since it may have come from a
3989 // G_BUILD_VECTOR_TRUNC.
3990 LLT ScalarTy = MRI.getType(Reg);
3991 Register DstReg = MI.getOperand(0).getReg();
3992 LLT DstTy = MRI.getType(DstReg);
3993
3994 if (ScalarTy != DstTy) {
3995 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3996 Builder.buildTrunc(DstReg, Reg);
3997 MI.eraseFromParent();
3998 return;
3999 }
4001}
4002
4005 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4006 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4007 // This combine tries to find build_vector's which have every source element
4008 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4009 // the masked load scalarization is run late in the pipeline. There's already
4010 // a combine for a similar pattern starting from the extract, but that
4011 // doesn't attempt to do it if there are multiple uses of the build_vector,
4012 // which in this case is true. Starting the combine from the build_vector
4013 // feels more natural than trying to find sibling nodes of extracts.
4014 // E.g.
4015 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4016 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4017 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4018 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4019 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4020 // ==>
4021 // replace ext{1,2,3,4} with %s{1,2,3,4}
4022
4023 Register DstReg = MI.getOperand(0).getReg();
4024 LLT DstTy = MRI.getType(DstReg);
4025 unsigned NumElts = DstTy.getNumElements();
4026
4027 SmallBitVector ExtractedElts(NumElts);
4028 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4029 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4030 return false;
4031 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4032 if (!Cst)
4033 return false;
4034 unsigned Idx = Cst->getZExtValue();
4035 if (Idx >= NumElts)
4036 return false; // Out of range.
4037 ExtractedElts.set(Idx);
4038 SrcDstPairs.emplace_back(
4039 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4040 }
4041 // Match if every element was extracted.
4042 return ExtractedElts.all();
4043}
4044
4047 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4048 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4049 for (auto &Pair : SrcDstPairs) {
4050 auto *ExtMI = Pair.second;
4051 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4052 ExtMI->eraseFromParent();
4053 }
4054 MI.eraseFromParent();
4055}
4056
4058 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4059 applyBuildFnNoErase(MI, MatchInfo);
4060 MI.eraseFromParent();
4061}
4062
4064 BuildFnTy &MatchInfo) {
4067 MatchInfo(Builder);
4068 Root->eraseFromParent();
4069}
4070
4072 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4073 MatchInfo(Builder);
4074}
4075
4077 BuildFnTy &MatchInfo) {
4078 assert(MI.getOpcode() == TargetOpcode::G_OR);
4079
4080 Register Dst = MI.getOperand(0).getReg();
4081 LLT Ty = MRI.getType(Dst);
4082 unsigned BitWidth = Ty.getScalarSizeInBits();
4083
4084 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4085 unsigned FshOpc = 0;
4086
4087 // Match (or (shl ...), (lshr ...)).
4088 if (!mi_match(Dst, MRI,
4089 // m_GOr() handles the commuted version as well.
4090 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4091 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4092 return false;
4093
4094 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4095 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4096 int64_t CstShlAmt, CstLShrAmt;
4097 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4098 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4099 CstShlAmt + CstLShrAmt == BitWidth) {
4100 FshOpc = TargetOpcode::G_FSHR;
4101 Amt = LShrAmt;
4102
4103 } else if (mi_match(LShrAmt, MRI,
4105 ShlAmt == Amt) {
4106 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4107 FshOpc = TargetOpcode::G_FSHL;
4108
4109 } else if (mi_match(ShlAmt, MRI,
4111 LShrAmt == Amt) {
4112 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4113 FshOpc = TargetOpcode::G_FSHR;
4114
4115 } else {
4116 return false;
4117 }
4118
4119 LLT AmtTy = MRI.getType(Amt);
4120 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4121 return false;
4122
4123 MatchInfo = [=](MachineIRBuilder &B) {
4124 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4125 };
4126 return true;
4127}
4128
4129/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4131 unsigned Opc = MI.getOpcode();
4132 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4133 Register X = MI.getOperand(1).getReg();
4134 Register Y = MI.getOperand(2).getReg();
4135 if (X != Y)
4136 return false;
4137 unsigned RotateOpc =
4138 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4139 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4140}
4141
4143 unsigned Opc = MI.getOpcode();
4144 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4145 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4147 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4148 : TargetOpcode::G_ROTR));
4149 MI.removeOperand(2);
4151}
4152
4153// Fold (rot x, c) -> (rot x, c % BitSize)
4155 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4156 MI.getOpcode() == TargetOpcode::G_ROTR);
4157 unsigned Bitsize =
4158 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4159 Register AmtReg = MI.getOperand(2).getReg();
4160 bool OutOfRange = false;
4161 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4162 if (auto *CI = dyn_cast<ConstantInt>(C))
4163 OutOfRange |= CI->getValue().uge(Bitsize);
4164 return true;
4165 };
4166 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4167}
4168
4170 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4171 MI.getOpcode() == TargetOpcode::G_ROTR);
4172 unsigned Bitsize =
4173 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4174 Register Amt = MI.getOperand(2).getReg();
4175 LLT AmtTy = MRI.getType(Amt);
4176 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4177 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4179 MI.getOperand(2).setReg(Amt);
4181}
4182
4184 int64_t &MatchInfo) {
4185 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4186 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4187 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4188 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4189 std::optional<bool> KnownVal;
4190 switch (Pred) {
4191 default:
4192 llvm_unreachable("Unexpected G_ICMP predicate?");
4193 case CmpInst::ICMP_EQ:
4194 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4195 break;
4196 case CmpInst::ICMP_NE:
4197 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4198 break;
4199 case CmpInst::ICMP_SGE:
4200 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4201 break;
4202 case CmpInst::ICMP_SGT:
4203 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4204 break;
4205 case CmpInst::ICMP_SLE:
4206 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4207 break;
4208 case CmpInst::ICMP_SLT:
4209 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4210 break;
4211 case CmpInst::ICMP_UGE:
4212 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4213 break;
4214 case CmpInst::ICMP_UGT:
4215 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4216 break;
4217 case CmpInst::ICMP_ULE:
4218 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4219 break;
4220 case CmpInst::ICMP_ULT:
4221 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4222 break;
4223 }
4224 if (!KnownVal)
4225 return false;
4226 MatchInfo =
4227 *KnownVal
4229 /*IsVector = */
4230 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4231 /* IsFP = */ false)
4232 : 0;
4233 return true;
4234}
4235
4237 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4238 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4239 // Given:
4240 //
4241 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4242 // %cmp = G_ICMP ne %x, 0
4243 //
4244 // Or:
4245 //
4246 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4247 // %cmp = G_ICMP eq %x, 1
4248 //
4249 // We can replace %cmp with %x assuming true is 1 on the target.
4250 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4251 if (!CmpInst::isEquality(Pred))
4252 return false;
4253 Register Dst = MI.getOperand(0).getReg();
4254 LLT DstTy = MRI.getType(Dst);
4256 /* IsFP = */ false) != 1)
4257 return false;
4258 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4259 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4260 return false;
4261 Register LHS = MI.getOperand(2).getReg();
4262 auto KnownLHS = KB->getKnownBits(LHS);
4263 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4264 return false;
4265 // Make sure replacing Dst with the LHS is a legal operation.
4266 LLT LHSTy = MRI.getType(LHS);
4267 unsigned LHSSize = LHSTy.getSizeInBits();
4268 unsigned DstSize = DstTy.getSizeInBits();
4269 unsigned Op = TargetOpcode::COPY;
4270 if (DstSize != LHSSize)
4271 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4272 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4273 return false;
4274 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4275 return true;
4276}
4277
4278// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4280 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4281 assert(MI.getOpcode() == TargetOpcode::G_AND);
4282
4283 // Ignore vector types to simplify matching the two constants.
4284 // TODO: do this for vectors and scalars via a demanded bits analysis.
4285 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4286 if (Ty.isVector())
4287 return false;
4288
4289 Register Src;
4290 Register AndMaskReg;
4291 int64_t AndMaskBits;
4292 int64_t OrMaskBits;
4293 if (!mi_match(MI, MRI,
4294 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4295 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4296 return false;
4297
4298 // Check if OrMask could turn on any bits in Src.
4299 if (AndMaskBits & OrMaskBits)
4300 return false;
4301
4302 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4304 // Canonicalize the result to have the constant on the RHS.
4305 if (MI.getOperand(1).getReg() == AndMaskReg)
4306 MI.getOperand(2).setReg(AndMaskReg);
4307 MI.getOperand(1).setReg(Src);
4309 };
4310 return true;
4311}
4312
4313/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4315 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4316 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4317 Register Dst = MI.getOperand(0).getReg();
4318 Register Src = MI.getOperand(1).getReg();
4319 LLT Ty = MRI.getType(Src);
4321 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4322 return false;
4323 int64_t Width = MI.getOperand(2).getImm();
4324 Register ShiftSrc;
4325 int64_t ShiftImm;
4326 if (!mi_match(
4327 Src, MRI,
4328 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4329 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4330 return false;
4331 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4332 return false;
4333
4334 MatchInfo = [=](MachineIRBuilder &B) {
4335 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4336 auto Cst2 = B.buildConstant(ExtractTy, Width);
4337 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4338 };
4339 return true;
4340}
4341
4342/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4344 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4345 assert(MI.getOpcode() == TargetOpcode::G_AND);
4346 Register Dst = MI.getOperand(0).getReg();
4347 LLT Ty = MRI.getType(Dst);
4349 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4350 return false;
4351
4352 int64_t AndImm, LSBImm;
4353 Register ShiftSrc;
4354 const unsigned Size = Ty.getScalarSizeInBits();
4355 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4356 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4357 m_ICst(AndImm))))
4358 return false;
4359
4360 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4361 auto MaybeMask = static_cast<uint64_t>(AndImm);
4362 if (MaybeMask & (MaybeMask + 1))
4363 return false;
4364
4365 // LSB must fit within the register.
4366 if (static_cast<uint64_t>(LSBImm) >= Size)
4367 return false;
4368
4369 uint64_t Width = APInt(Size, AndImm).countr_one();
4370 MatchInfo = [=](MachineIRBuilder &B) {
4371 auto WidthCst = B.buildConstant(ExtractTy, Width);
4372 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4373 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4374 };
4375 return true;
4376}
4377
4379 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4380 const unsigned Opcode = MI.getOpcode();
4381 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4382
4383 const Register Dst = MI.getOperand(0).getReg();
4384
4385 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4386 ? TargetOpcode::G_SBFX
4387 : TargetOpcode::G_UBFX;
4388
4389 // Check if the type we would use for the extract is legal
4390 LLT Ty = MRI.getType(Dst);
4392 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4393 return false;
4394
4395 Register ShlSrc;
4396 int64_t ShrAmt;
4397 int64_t ShlAmt;
4398 const unsigned Size = Ty.getScalarSizeInBits();
4399
4400 // Try to match shr (shl x, c1), c2
4401 if (!mi_match(Dst, MRI,
4402 m_BinOp(Opcode,
4403 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4404 m_ICst(ShrAmt))))
4405 return false;
4406
4407 // Make sure that the shift sizes can fit a bitfield extract
4408 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4409 return false;
4410
4411 // Skip this combine if the G_SEXT_INREG combine could handle it
4412 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4413 return false;
4414
4415 // Calculate start position and width of the extract
4416 const int64_t Pos = ShrAmt - ShlAmt;
4417 const int64_t Width = Size - ShrAmt;
4418
4419 MatchInfo = [=](MachineIRBuilder &B) {
4420 auto WidthCst = B.buildConstant(ExtractTy, Width);
4421 auto PosCst = B.buildConstant(ExtractTy, Pos);
4422 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4423 };
4424 return true;
4425}
4426
4428 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4429 const unsigned Opcode = MI.getOpcode();
4430 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4431
4432 const Register Dst = MI.getOperand(0).getReg();
4433 LLT Ty = MRI.getType(Dst);
4435 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4436 return false;
4437
4438 // Try to match shr (and x, c1), c2
4439 Register AndSrc;
4440 int64_t ShrAmt;
4441 int64_t SMask;
4442 if (!mi_match(Dst, MRI,
4443 m_BinOp(Opcode,
4444 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4445 m_ICst(ShrAmt))))
4446 return false;
4447
4448 const unsigned Size = Ty.getScalarSizeInBits();
4449 if (ShrAmt < 0 || ShrAmt >= Size)
4450 return false;
4451
4452 // If the shift subsumes the mask, emit the 0 directly.
4453 if (0 == (SMask >> ShrAmt)) {
4454 MatchInfo = [=](MachineIRBuilder &B) {
4455 B.buildConstant(Dst, 0);
4456 };
4457 return true;
4458 }
4459
4460 // Check that ubfx can do the extraction, with no holes in the mask.
4461 uint64_t UMask = SMask;
4462 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4463 UMask &= maskTrailingOnes<uint64_t>(Size);
4464 if (!isMask_64(UMask))
4465 return false;
4466
4467 // Calculate start position and width of the extract.
4468 const int64_t Pos = ShrAmt;
4469 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4470
4471 // It's preferable to keep the shift, rather than form G_SBFX.
4472 // TODO: remove the G_AND via demanded bits analysis.
4473 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4474 return false;
4475
4476 MatchInfo = [=](MachineIRBuilder &B) {
4477 auto WidthCst = B.buildConstant(ExtractTy, Width);
4478 auto PosCst = B.buildConstant(ExtractTy, Pos);
4479 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4480 };
4481 return true;
4482}
4483
4484bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4485 MachineInstr &MI) {
4486 auto &PtrAdd = cast<GPtrAdd>(MI);
4487
4488 Register Src1Reg = PtrAdd.getBaseReg();
4489 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4490 if (!Src1Def)
4491 return false;
4492
4493 Register Src2Reg = PtrAdd.getOffsetReg();
4494
4495 if (MRI.hasOneNonDBGUse(Src1Reg))
4496 return false;
4497
4498 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4499 if (!C1)
4500 return false;
4501 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4502 if (!C2)
4503 return false;
4504
4505 const APInt &C1APIntVal = *C1;
4506 const APInt &C2APIntVal = *C2;
4507 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4508
4509 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4510 // This combine may end up running before ptrtoint/inttoptr combines
4511 // manage to eliminate redundant conversions, so try to look through them.
4512 MachineInstr *ConvUseMI = &UseMI;
4513 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4514 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4515 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4516 Register DefReg = ConvUseMI->getOperand(0).getReg();
4517 if (!MRI.hasOneNonDBGUse(DefReg))
4518 break;
4519 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4520 ConvUseOpc = ConvUseMI->getOpcode();
4521 }
4522 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4523 if (!LdStMI)
4524 continue;
4525 // Is x[offset2] already not a legal addressing mode? If so then
4526 // reassociating the constants breaks nothing (we test offset2 because
4527 // that's the one we hope to fold into the load or store).
4529 AM.HasBaseReg = true;
4530 AM.BaseOffs = C2APIntVal.getSExtValue();
4531 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4532 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4533 PtrAdd.getMF()->getFunction().getContext());
4534 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4535 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4536 AccessTy, AS))
4537 continue;
4538
4539 // Would x[offset1+offset2] still be a legal addressing mode?
4540 AM.BaseOffs = CombinedValue;
4541 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4542 AccessTy, AS))
4543 return true;
4544 }
4545
4546 return false;
4547}
4548
4550 MachineInstr *RHS,
4551 BuildFnTy &MatchInfo) {
4552 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4553 Register Src1Reg = MI.getOperand(1).getReg();
4554 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4555 return false;
4556 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4557 if (!C2)
4558 return false;
4559
4560 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4561 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4562
4563 auto NewBase =
4564 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4566 MI.getOperand(1).setReg(NewBase.getReg(0));
4567 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4569 };
4570 return !reassociationCanBreakAddressingModePattern(MI);
4571}
4572
4574 MachineInstr *LHS,
4575 MachineInstr *RHS,
4576 BuildFnTy &MatchInfo) {
4577 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4578 // if and only if (G_PTR_ADD X, C) has one use.
4579 Register LHSBase;
4580 std::optional<ValueAndVReg> LHSCstOff;
4581 if (!mi_match(MI.getBaseReg(), MRI,
4582 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4583 return false;
4584
4585 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4586 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4587 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4588 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4589 // doesn't happen.
4590 LHSPtrAdd->moveBefore(&MI);
4591 Register RHSReg = MI.getOffsetReg();
4592 // set VReg will cause type mismatch if it comes from extend/trunc
4593 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4595 MI.getOperand(2).setReg(NewCst.getReg(0));
4597 Observer.changingInstr(*LHSPtrAdd);
4598 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4599 Observer.changedInstr(*LHSPtrAdd);
4600 };
4601 return !reassociationCanBreakAddressingModePattern(MI);
4602}
4603
4605 MachineInstr *LHS,
4606 MachineInstr *RHS,
4607 BuildFnTy &MatchInfo) {
4608 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4609 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4610 if (!LHSPtrAdd)
4611 return false;
4612
4613 Register Src2Reg = MI.getOperand(2).getReg();
4614 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4615 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4616 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4617 if (!C1)
4618 return false;
4619 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4620 if (!C2)
4621 return false;
4622
4623 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4624 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4626 MI.getOperand(1).setReg(LHSSrc1);
4627 MI.getOperand(2).setReg(NewCst.getReg(0));
4629 };
4630 return !reassociationCanBreakAddressingModePattern(MI);
4631}
4632
4634 BuildFnTy &MatchInfo) {
4635 auto &PtrAdd = cast<GPtrAdd>(MI);
4636 // We're trying to match a few pointer computation patterns here for
4637 // re-association opportunities.
4638 // 1) Isolating a constant operand to be on the RHS, e.g.:
4639 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4640 //
4641 // 2) Folding two constants in each sub-tree as long as such folding
4642 // doesn't break a legal addressing mode.
4643 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4644 //
4645 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4646 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4647 // iif (G_PTR_ADD X, C) has one use.
4648 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4649 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4650
4651 // Try to match example 2.
4652 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4653 return true;
4654
4655 // Try to match example 3.
4656 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4657 return true;
4658
4659 // Try to match example 1.
4660 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4661 return true;
4662
4663 return false;
4664}
4666 Register OpLHS, Register OpRHS,
4667 BuildFnTy &MatchInfo) {
4668 LLT OpRHSTy = MRI.getType(OpRHS);
4669 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4670
4671 if (OpLHSDef->getOpcode() != Opc)
4672 return false;
4673
4674 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4675 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4676 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4677
4678 // If the inner op is (X op C), pull the constant out so it can be folded with
4679 // other constants in the expression tree. Folding is not guaranteed so we
4680 // might have (C1 op C2). In that case do not pull a constant out because it
4681 // won't help and can lead to infinite loops.
4684 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4685 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4686 MatchInfo = [=](MachineIRBuilder &B) {
4687 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4688 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4689 };
4690 return true;
4691 }
4692 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4693 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4694 // iff (op x, c1) has one use
4695 MatchInfo = [=](MachineIRBuilder &B) {
4696 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4697 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4698 };
4699 return true;
4700 }
4701 }
4702
4703 return false;
4704}
4705
4707 BuildFnTy &MatchInfo) {
4708 // We don't check if the reassociation will break a legal addressing mode
4709 // here since pointer arithmetic is handled by G_PTR_ADD.
4710 unsigned Opc = MI.getOpcode();
4711 Register DstReg = MI.getOperand(0).getReg();
4712 Register LHSReg = MI.getOperand(1).getReg();
4713 Register RHSReg = MI.getOperand(2).getReg();
4714
4715 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4716 return true;
4717 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4718 return true;
4719 return false;
4720}
4721
4723 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4724 Register SrcOp = MI.getOperand(1).getReg();
4725
4726 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4727 MatchInfo = *MaybeCst;
4728 return true;
4729 }
4730
4731 return false;
4732}
4733
4735 Register Op1 = MI.getOperand(1).getReg();
4736 Register Op2 = MI.getOperand(2).getReg();
4737 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4738 if (!MaybeCst)
4739 return false;
4740 MatchInfo = *MaybeCst;
4741 return true;
4742}
4743
4745 Register Op1 = MI.getOperand(1).getReg();
4746 Register Op2 = MI.getOperand(2).getReg();
4747 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4748 if (!MaybeCst)
4749 return false;
4750 MatchInfo =
4751 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4752 return true;
4753}
4754
4756 ConstantFP *&MatchInfo) {
4757 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4758 MI.getOpcode() == TargetOpcode::G_FMAD);
4759 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4760
4761 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4762 if (!Op3Cst)
4763 return false;
4764
4765 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4766 if (!Op2Cst)
4767 return false;
4768
4769 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4770 if (!Op1Cst)
4771 return false;
4772
4773 APFloat Op1F = Op1Cst->getValueAPF();
4774 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4776 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4777 return true;
4778}
4779
4781 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4782 // Look for a binop feeding into an AND with a mask:
4783 //
4784 // %add = G_ADD %lhs, %rhs
4785 // %and = G_AND %add, 000...11111111
4786 //
4787 // Check if it's possible to perform the binop at a narrower width and zext
4788 // back to the original width like so:
4789 //
4790 // %narrow_lhs = G_TRUNC %lhs
4791 // %narrow_rhs = G_TRUNC %rhs
4792 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4793 // %new_add = G_ZEXT %narrow_add
4794 // %and = G_AND %new_add, 000...11111111
4795 //
4796 // This can allow later combines to eliminate the G_AND if it turns out
4797 // that the mask is irrelevant.
4798 assert(MI.getOpcode() == TargetOpcode::G_AND);
4799 Register Dst = MI.getOperand(0).getReg();
4800 Register AndLHS = MI.getOperand(1).getReg();
4801 Register AndRHS = MI.getOperand(2).getReg();
4802 LLT WideTy = MRI.getType(Dst);
4803
4804 // If the potential binop has more than one use, then it's possible that one
4805 // of those uses will need its full width.
4806 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4807 return false;
4808
4809 // Check if the LHS feeding the AND is impacted by the high bits that we're
4810 // masking out.
4811 //
4812 // e.g. for 64-bit x, y:
4813 //
4814 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4815 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4816 if (!LHSInst)
4817 return false;
4818 unsigned LHSOpc = LHSInst->getOpcode();
4819 switch (LHSOpc) {
4820 default:
4821 return false;
4822 case TargetOpcode::G_ADD:
4823 case TargetOpcode::G_SUB:
4824 case TargetOpcode::G_MUL:
4825 case TargetOpcode::G_AND:
4826 case TargetOpcode::G_OR:
4827 case TargetOpcode::G_XOR:
4828 break;
4829 }
4830
4831 // Find the mask on the RHS.
4832 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4833 if (!Cst)
4834 return false;
4835 auto Mask = Cst->Value;
4836 if (!Mask.isMask())
4837 return false;
4838
4839 // No point in combining if there's nothing to truncate.
4840 unsigned NarrowWidth = Mask.countr_one();
4841 if (NarrowWidth == WideTy.getSizeInBits())
4842 return false;
4843 LLT NarrowTy = LLT::scalar(NarrowWidth);
4844
4845 // Check if adding the zext + truncates could be harmful.
4846 auto &MF = *MI.getMF();
4847 const auto &TLI = getTargetLowering();
4848 LLVMContext &Ctx = MF.getFunction().getContext();
4849 auto &DL = MF.getDataLayout();
4850 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4851 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4852 return false;
4853 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4854 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4855 return false;
4856 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4857 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4858 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4859 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4860 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4861 auto NarrowBinOp =
4862 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4863 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4865 MI.getOperand(1).setReg(Ext.getReg(0));
4867 };
4868 return true;
4869}
4870
4872 unsigned Opc = MI.getOpcode();
4873 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4874
4875 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4876 return false;
4877
4878 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4880 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4881 : TargetOpcode::G_SADDO;
4882 MI.setDesc(Builder.getTII().get(NewOpc));
4883 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4885 };
4886 return true;
4887}
4888
4890 // (G_*MULO x, 0) -> 0 + no carry out
4891 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
4892 MI.getOpcode() == TargetOpcode::G_SMULO);
4893 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
4894 return false;
4895 Register Dst = MI.getOperand(0).getReg();
4896 Register Carry = MI.getOperand(1).getReg();
4899 return false;
4900 MatchInfo = [=](MachineIRBuilder &B) {
4901 B.buildConstant(Dst, 0);
4902 B.buildConstant(Carry, 0);
4903 };
4904 return true;
4905}
4906
4908 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
4909 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
4910 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
4911 MI.getOpcode() == TargetOpcode::G_SADDE ||
4912 MI.getOpcode() == TargetOpcode::G_USUBE ||
4913 MI.getOpcode() == TargetOpcode::G_SSUBE);
4914 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
4915 return false;
4916 MatchInfo = [&](MachineIRBuilder &B) {
4917 unsigned NewOpcode;
4918 switch (MI.getOpcode()) {
4919 case TargetOpcode::G_UADDE:
4920 NewOpcode = TargetOpcode::G_UADDO;
4921 break;
4922 case TargetOpcode::G_SADDE:
4923 NewOpcode = TargetOpcode::G_SADDO;
4924 break;
4925 case TargetOpcode::G_USUBE:
4926 NewOpcode = TargetOpcode::G_USUBO;
4927 break;
4928 case TargetOpcode::G_SSUBE:
4929 NewOpcode = TargetOpcode::G_SSUBO;
4930 break;
4931 }
4933 MI.setDesc(B.getTII().get(NewOpcode));
4934 MI.removeOperand(4);
4936 };
4937 return true;
4938}
4939
4941 BuildFnTy &MatchInfo) {
4942 assert(MI.getOpcode() == TargetOpcode::G_SUB);
4943 Register Dst = MI.getOperand(0).getReg();
4944 // (x + y) - z -> x (if y == z)
4945 // (x + y) - z -> y (if x == z)
4946 Register X, Y, Z;
4947 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
4948 Register ReplaceReg;
4949 int64_t CstX, CstY;
4950 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
4952 ReplaceReg = X;
4953 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
4955 ReplaceReg = Y;
4956 if (ReplaceReg) {
4957 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
4958 return true;
4959 }
4960 }
4961
4962 // x - (y + z) -> 0 - y (if x == z)
4963 // x - (y + z) -> 0 - z (if x == y)
4964 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
4965 Register ReplaceReg;
4966 int64_t CstX;
4967 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
4969 ReplaceReg = Y;
4970 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
4972 ReplaceReg = Z;
4973 if (ReplaceReg) {
4974 MatchInfo = [=](MachineIRBuilder &B) {
4975 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
4976 B.buildSub(Dst, Zero, ReplaceReg);
4977 };
4978 return true;
4979 }
4980 }
4981 return false;
4982}
4983
4985 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
4986 auto &UDiv = cast<GenericMachineInstr>(MI);
4987 Register Dst = UDiv.getReg(0);
4988 Register LHS = UDiv.getReg(1);
4989 Register RHS = UDiv.getReg(2);
4990 LLT Ty = MRI.getType(Dst);
4991 LLT ScalarTy = Ty.getScalarType();
4992 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
4994 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
4995 auto &MIB = Builder;
4996
4997 bool UseNPQ = false;
4998 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4999
5000 auto BuildUDIVPattern = [&](const Constant *C) {
5001 auto *CI = cast<ConstantInt>(C);
5002 const APInt &Divisor = CI->getValue();
5003
5004 bool SelNPQ = false;
5005 APInt Magic(Divisor.getBitWidth(), 0);
5006 unsigned PreShift = 0, PostShift = 0;
5007
5008 // Magic algorithm doesn't work for division by 1. We need to emit a select
5009 // at the end.
5010 // TODO: Use undef values for divisor of 1.
5011 if (!Divisor.isOne()) {
5014
5015 Magic = std::move(magics.Magic);
5016
5017 assert(magics.PreShift < Divisor.getBitWidth() &&
5018 "We shouldn't generate an undefined shift!");
5019 assert(magics.PostShift < Divisor.getBitWidth() &&
5020 "We shouldn't generate an undefined shift!");
5021 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5022 PreShift = magics.PreShift;
5023 PostShift = magics.PostShift;
5024 SelNPQ = magics.IsAdd;
5025 }
5026
5027 PreShifts.push_back(
5028 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5029 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5030 NPQFactors.push_back(
5031 MIB.buildConstant(ScalarTy,
5032 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5033 : APInt::getZero(EltBits))
5034 .getReg(0));
5035 PostShifts.push_back(
5036 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5037 UseNPQ |= SelNPQ;
5038 return true;
5039 };
5040
5041 // Collect the shifts/magic values from each element.
5042 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5043 (void)Matched;
5044 assert(Matched && "Expected unary predicate match to succeed");
5045
5046 Register PreShift, PostShift, MagicFactor, NPQFactor;
5047 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5048 if (RHSDef) {
5049 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5050 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5051 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5052 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5053 } else {
5055 "Non-build_vector operation should have been a scalar");
5056 PreShift = PreShifts[0];
5057 MagicFactor = MagicFactors[0];
5058 PostShift = PostShifts[0];
5059 }
5060
5061 Register Q = LHS;
5062 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5063
5064 // Multiply the numerator (operand 0) by the magic value.
5065 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5066
5067 if (UseNPQ) {
5068 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5069
5070 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5071 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5072 if (Ty.isVector())
5073 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5074 else
5075 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5076
5077 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5078 }
5079
5080 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5081 auto One = MIB.buildConstant(Ty, 1);
5082 auto IsOne = MIB.buildICmp(
5084 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5085 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5086}
5087
5089 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5090 Register Dst = MI.getOperand(0).getReg();
5091 Register RHS = MI.getOperand(2).getReg();
5092 LLT DstTy = MRI.getType(Dst);
5093 auto *RHSDef = MRI.getVRegDef(RHS);
5094 if (!isConstantOrConstantVector(*RHSDef, MRI))
5095 return false;
5096
5097 auto &MF = *MI.getMF();
5098 AttributeList Attr = MF.getFunction().getAttributes();
5099 const auto &TLI = getTargetLowering();
5100 LLVMContext &Ctx = MF.getFunction().getContext();
5101 auto &DL = MF.getDataLayout();
5102 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5103 return false;
5104
5105 // Don't do this for minsize because the instruction sequence is usually
5106 // larger.
5107 if (MF.getFunction().hasMinSize())
5108 return false;
5109
5110 // Don't do this if the types are not going to be legal.
5111 if (LI) {
5112 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5113 return false;
5114 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5115 return false;
5117 {TargetOpcode::G_ICMP,
5118 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5119 DstTy}}))
5120 return false;
5121 }
5122
5123 auto CheckEltValue = [&](const Constant *C) {
5124 if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
5125 return !CI->isZero();
5126 return false;
5127 };
5128 return matchUnaryPredicate(MRI, RHS, CheckEltValue);
5129}
5130
5132 auto *NewMI = buildUDivUsingMul(MI);
5133 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5134}
5135
5137 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5138 Register Dst = MI.getOperand(0).getReg();
5139 Register RHS = MI.getOperand(2).getReg();
5140 LLT DstTy = MRI.getType(Dst);
5141
5142 auto &MF = *MI.getMF();
5143 AttributeList Attr = MF.getFunction().getAttributes();
5144 const auto &TLI = getTargetLowering();
5145 LLVMContext &Ctx = MF.getFunction().getContext();
5146 auto &DL = MF.getDataLayout();
5147 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5148 return false;
5149
5150 // Don't do this for minsize because the instruction sequence is usually
5151 // larger.
5152 if (MF.getFunction().hasMinSize())
5153 return false;
5154
5155 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5156 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5157 return matchUnaryPredicate(
5158 MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
5159 }
5160
5161 // Don't support the general case for now.
5162 return false;
5163}
5164
5166 auto *NewMI = buildSDivUsingMul(MI);
5167 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5168}
5169
5171 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5172 auto &SDiv = cast<GenericMachineInstr>(MI);
5173 Register Dst = SDiv.getReg(0);
5174 Register LHS = SDiv.getReg(1);
5175 Register RHS = SDiv.getReg(2);
5176 LLT Ty = MRI.getType(Dst);
5177 LLT ScalarTy = Ty.getScalarType();
5179 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5180 auto &MIB = Builder;
5181
5182 bool UseSRA = false;
5183 SmallVector<Register, 16> Shifts, Factors;
5184
5185 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5186 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5187
5188 auto BuildSDIVPattern = [&](const Constant *C) {
5189 // Don't recompute inverses for each splat element.
5190 if (IsSplat && !Factors.empty()) {
5191 Shifts.push_back(Shifts[0]);
5192 Factors.push_back(Factors[0]);
5193 return true;
5194 }
5195
5196 auto *CI = cast<ConstantInt>(C);
5197 APInt Divisor = CI->getValue();
5198 unsigned Shift = Divisor.countr_zero();
5199 if (Shift) {
5200 Divisor.ashrInPlace(Shift);
5201 UseSRA = true;
5202 }
5203
5204 // Calculate the multiplicative inverse modulo BW.
5205 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5206 APInt Factor = Divisor.multiplicativeInverse();
5207 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5208 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5209 return true;
5210 };
5211
5212 // Collect all magic values from the build vector.
5213 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5214 (void)Matched;
5215 assert(Matched && "Expected unary predicate match to succeed");
5216
5217 Register Shift, Factor;
5218 if (Ty.isVector()) {
5219 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5220 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5221 } else {
5222 Shift = Shifts[0];
5223 Factor = Factors[0];
5224 }
5225
5226 Register Res = LHS;
5227
5228 if (UseSRA)
5229 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5230
5231 return MIB.buildMul(Ty, Res, Factor);
5232}
5233
5235 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5236 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5237 "Expected SDIV or UDIV");
5238 auto &Div = cast<GenericMachineInstr>(MI);
5239 Register RHS = Div.getReg(2);
5240 auto MatchPow2 = [&](const Constant *C) {
5241 auto *CI = dyn_cast<ConstantInt>(C);
5242 return CI && (CI->getValue().isPowerOf2() ||
5243 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5244 };
5245 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5246}
5247
5249 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5250 auto &SDiv = cast<GenericMachineInstr>(MI);
5251 Register Dst = SDiv.getReg(0);
5252 Register LHS = SDiv.getReg(1);
5253 Register RHS = SDiv.getReg(2);
5254 LLT Ty = MRI.getType(Dst);
5256 LLT CCVT =
5257 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5258
5259 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5260 // to the following version:
5261 //
5262 // %c1 = G_CTTZ %rhs
5263 // %inexact = G_SUB $bitwidth, %c1
5264 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5265 // %lshr = G_LSHR %sign, %inexact
5266 // %add = G_ADD %lhs, %lshr
5267 // %ashr = G_ASHR %add, %c1
5268 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5269 // %zero = G_CONSTANT $0
5270 // %neg = G_NEG %ashr
5271 // %isneg = G_ICMP SLT %rhs, %zero
5272 // %res = G_SELECT %isneg, %neg, %ashr
5273
5274 unsigned BitWidth = Ty.getScalarSizeInBits();
5275 auto Zero = Builder.buildConstant(Ty, 0);
5276
5277 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5278 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5279 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5280 // Splat the sign bit into the register
5281 auto Sign = Builder.buildAShr(
5282 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5283
5284 // Add (LHS < 0) ? abs2 - 1 : 0;
5285 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5286 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5287 auto AShr = Builder.buildAShr(Ty, Add, C1);
5288
5289 // Special case: (sdiv X, 1) -> X
5290 // Special Case: (sdiv X, -1) -> 0-X
5291 auto One = Builder.buildConstant(Ty, 1);
5292 auto MinusOne = Builder.buildConstant(Ty, -1);
5293 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5294 auto IsMinusOne =
5296 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5297 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5298
5299 // If divided by a positive value, we're done. Otherwise, the result must be
5300 // negated.
5301 auto Neg = Builder.buildNeg(Ty, AShr);
5302 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5303 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5304 MI.eraseFromParent();
5305}
5306
5308 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5309 auto &UDiv = cast<GenericMachineInstr>(MI);
5310 Register Dst = UDiv.getReg(0);
5311 Register LHS = UDiv.getReg(1);
5312 Register RHS = UDiv.getReg(2);
5313 LLT Ty = MRI.getType(Dst);
5315
5316 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5317 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5318 MI.eraseFromParent();
5319}
5320
5322 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5323 Register RHS = MI.getOperand(2).getReg();
5324 Register Dst = MI.getOperand(0).getReg();
5325 LLT Ty = MRI.getType(Dst);
5327 auto MatchPow2ExceptOne = [&](const Constant *C) {
5328 if (auto *CI = dyn_cast<ConstantInt>(C))
5329 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5330 return false;
5331 };
5332 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5333 return false;
5334 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5335}
5336
5338 Register LHS = MI.getOperand(1).getReg();
5339 Register RHS = MI.getOperand(2).getReg();
5340 Register Dst = MI.getOperand(0).getReg();
5341 LLT Ty = MRI.getType(Dst);
5343 unsigned NumEltBits = Ty.getScalarSizeInBits();
5344
5345 auto LogBase2 = buildLogBase2(RHS, Builder);
5346 auto ShiftAmt =
5347 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5348 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5349 Builder.buildLShr(Dst, LHS, Trunc);
5350 MI.eraseFromParent();
5351}
5352
5354 BuildFnTy &MatchInfo) {
5355 unsigned Opc = MI.getOpcode();
5356 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5357 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5358 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5359
5360 Register Dst = MI.getOperand(0).getReg();
5361 Register X = MI.getOperand(1).getReg();
5362 Register Y = MI.getOperand(2).getReg();
5363 LLT Type = MRI.getType(Dst);
5364
5365 // fold (fadd x, fneg(y)) -> (fsub x, y)
5366 // fold (fadd fneg(y), x) -> (fsub x, y)
5367 // G_ADD is commutative so both cases are checked by m_GFAdd
5368 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5369 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5370 Opc = TargetOpcode::G_FSUB;
5371 }
5372 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5373 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5374 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5375 Opc = TargetOpcode::G_FADD;
5376 }
5377 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5378 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5379 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5380 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5381 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5382 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5383 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5384 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5385 // no opcode change
5386 } else
5387 return false;
5388
5389 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5391 MI.setDesc(B.getTII().get(Opc));
5392 MI.getOperand(1).setReg(X);
5393 MI.getOperand(2).setReg(Y);
5395 };
5396 return true;
5397}
5398
5400 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5401
5402 Register LHS = MI.getOperand(1).getReg();
5403 MatchInfo = MI.getOperand(2).getReg();
5404 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5405
5406 const auto LHSCst = Ty.isVector()
5407 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5409 if (!LHSCst)
5410 return false;
5411
5412 // -0.0 is always allowed
5413 if (LHSCst->Value.isNegZero())
5414 return true;
5415
5416 // +0.0 is only allowed if nsz is set.
5417 if (LHSCst->Value.isPosZero())
5418 return MI.getFlag(MachineInstr::FmNsz);
5419
5420 return false;
5421}
5422
5424 Register Dst = MI.getOperand(0).getReg();
5426 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5427 eraseInst(MI);
5428}
5429
5430/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5431/// due to global flags or MachineInstr flags.
5432static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5433 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5434 return false;
5435 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5436}
5437
5438static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5439 const MachineRegisterInfo &MRI) {
5440 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5441 MRI.use_instr_nodbg_end()) >
5442 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5443 MRI.use_instr_nodbg_end());
5444}
5445
5447 bool &AllowFusionGlobally,
5448 bool &HasFMAD, bool &Aggressive,
5449 bool CanReassociate) {
5450
5451 auto *MF = MI.getMF();
5452 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5453 const TargetOptions &Options = MF->getTarget().Options;
5454 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5455
5456 if (CanReassociate &&
5457 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5458 return false;
5459
5460 // Floating-point multiply-add with intermediate rounding.
5461 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5462 // Floating-point multiply-add without intermediate rounding.
5463 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5464 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5465 // No valid opcode, do not combine.
5466 if (!HasFMAD && !HasFMA)
5467 return false;
5468
5469 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5470 Options.UnsafeFPMath || HasFMAD;
5471 // If the addition is not contractable, do not combine.
5472 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5473 return false;
5474
5475 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5476 return true;
5477}
5478
5480 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5481 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5482
5483 bool AllowFusionGlobally, HasFMAD, Aggressive;
5484 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5485 return false;
5486
5487 Register Op1 = MI.getOperand(1).getReg();
5488 Register Op2 = MI.getOperand(2).getReg();
5491 unsigned PreferredFusedOpcode =
5492 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5493
5494 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5495 // prefer to fold the multiply with fewer uses.
5496 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5497 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5498 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5499 std::swap(LHS, RHS);
5500 }
5501
5502 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5503 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5504 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5505 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5506 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5507 {LHS.MI->getOperand(1).getReg(),
5508 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5509 };
5510 return true;
5511 }
5512
5513 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5514 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5515 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5516 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5517 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5518 {RHS.MI->getOperand(1).getReg(),
5519 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5520 };
5521 return true;
5522 }
5523
5524 return false;
5525}
5526
5528 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5529 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5530
5531 bool AllowFusionGlobally, HasFMAD, Aggressive;
5532 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5533 return false;
5534
5535 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5536 Register Op1 = MI.getOperand(1).getReg();
5537 Register Op2 = MI.getOperand(2).getReg();
5540 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5541
5542 unsigned PreferredFusedOpcode =
5543 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5544
5545 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5546 // prefer to fold the multiply with fewer uses.
5547 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5548 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5549 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5550 std::swap(LHS, RHS);
5551 }
5552
5553 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5554 MachineInstr *FpExtSrc;
5555 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5556 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5557 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5558 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5559 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5560 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5561 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5562 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5563 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5564 };
5565 return true;
5566 }
5567
5568 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5569 // Note: Commutes FADD operands.
5570 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5571 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5572 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5573 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5574 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5575 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5576 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5577 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5578 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5579 };
5580 return true;
5581 }
5582
5583 return false;
5584}
5585
5587 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5588 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5589
5590 bool AllowFusionGlobally, HasFMAD, Aggressive;
5591 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5592 return false;
5593
5594 Register Op1 = MI.getOperand(1).getReg();
5595 Register Op2 = MI.getOperand(2).getReg();
5598 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5599
5600 unsigned PreferredFusedOpcode =
5601 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5602
5603 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5604 // prefer to fold the multiply with fewer uses.
5605 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5606 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5607 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5608 std::swap(LHS, RHS);
5609 }
5610
5611 MachineInstr *FMA = nullptr;
5612 Register Z;
5613 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5614 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5615 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5616 TargetOpcode::G_FMUL) &&
5617 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5618 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5619 FMA = LHS.MI;
5620 Z = RHS.Reg;
5621 }
5622 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5623 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5624 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5625 TargetOpcode::G_FMUL) &&
5626 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5627 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5628 Z = LHS.Reg;
5629 FMA = RHS.MI;
5630 }
5631
5632 if (FMA) {
5633 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5634 Register X = FMA->getOperand(1).getReg();
5635 Register Y = FMA->getOperand(2).getReg();
5636 Register U = FMulMI->getOperand(1).getReg();
5637 Register V = FMulMI->getOperand(2).getReg();
5638
5639 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5640 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5641 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5642 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5643 {X, Y, InnerFMA});
5644 };
5645 return true;
5646 }
5647
5648 return false;
5649}
5650
5652 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5653 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5654
5655 bool AllowFusionGlobally, HasFMAD, Aggressive;
5656 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5657 return false;
5658
5659 if (!Aggressive)
5660 return false;
5661
5662 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5663 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5664 Register Op1 = MI.getOperand(1).getReg();
5665 Register Op2 = MI.getOperand(2).getReg();
5668
5669 unsigned PreferredFusedOpcode =
5670 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5671
5672 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5673 // prefer to fold the multiply with fewer uses.
5674 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5675 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5676 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5677 std::swap(LHS, RHS);
5678 }
5679
5680 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5681 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5683 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5684 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5685 Register InnerFMA =
5686 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5687 .getReg(0);
5688 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5689 {X, Y, InnerFMA});
5690 };
5691
5692 MachineInstr *FMulMI, *FMAMI;
5693 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5694 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5695 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5696 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5697 m_GFPExt(m_MInstr(FMulMI))) &&
5698 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5699 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5700 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5701 MatchInfo = [=](MachineIRBuilder &B) {
5702 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5703 FMulMI->getOperand(2).getReg(), RHS.Reg,
5704 LHS.MI->getOperand(1).getReg(),
5705 LHS.MI->getOperand(2).getReg(), B);
5706 };
5707 return true;
5708 }
5709
5710 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5711 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5712 // FIXME: This turns two single-precision and one double-precision
5713 // operation into two double-precision operations, which might not be
5714 // interesting for all targets, especially GPUs.
5715 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5716 FMAMI->getOpcode() == PreferredFusedOpcode) {
5717 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5718 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5719 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5720 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5721 MatchInfo = [=](MachineIRBuilder &B) {
5722 Register X = FMAMI->getOperand(1).getReg();
5723 Register Y = FMAMI->getOperand(2).getReg();
5724 X = B.buildFPExt(DstType, X).getReg(0);
5725 Y = B.buildFPExt(DstType, Y).getReg(0);
5726 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5727 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5728 };
5729
5730 return true;
5731 }
5732 }
5733
5734 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5735 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5736 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5737 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5738 m_GFPExt(m_MInstr(FMulMI))) &&
5739 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5740 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5741 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5742 MatchInfo = [=](MachineIRBuilder &B) {
5743 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5744 FMulMI->getOperand(2).getReg(), LHS.Reg,
5745 RHS.MI->getOperand(1).getReg(),
5746 RHS.MI->getOperand(2).getReg(), B);
5747 };
5748 return true;
5749 }
5750
5751 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5752 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5753 // FIXME: This turns two single-precision and one double-precision
5754 // operation into two double-precision operations, which might not be
5755 // interesting for all targets, especially GPUs.
5756 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5757 FMAMI->getOpcode() == PreferredFusedOpcode) {
5758 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5759 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5760 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5761 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5762 MatchInfo = [=](MachineIRBuilder &B) {
5763 Register X = FMAMI->getOperand(1).getReg();
5764 Register Y = FMAMI->getOperand(2).getReg();
5765 X = B.buildFPExt(DstType, X).getReg(0);
5766 Y = B.buildFPExt(DstType, Y).getReg(0);
5767 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5768 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5769 };
5770 return true;
5771 }
5772 }
5773
5774 return false;
5775}
5776
5778 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5779 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5780
5781 bool AllowFusionGlobally, HasFMAD, Aggressive;
5782 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5783 return false;
5784
5785 Register Op1 = MI.getOperand(1).getReg();
5786 Register Op2 = MI.getOperand(2).getReg();
5789 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5790
5791 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5792 // prefer to fold the multiply with fewer uses.
5793 int FirstMulHasFewerUses = true;
5794 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5795 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5796 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5797 FirstMulHasFewerUses = false;
5798
5799 unsigned PreferredFusedOpcode =
5800 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5801
5802 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5803 if (FirstMulHasFewerUses &&
5804 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5805 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5806 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5807 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5808 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5809 {LHS.MI->getOperand(1).getReg(),
5810 LHS.MI->getOperand(2).getReg(), NegZ});
5811 };
5812 return true;
5813 }
5814 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5815 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5816 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5817 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5818 Register NegY =
5819 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5820 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5821 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5822 };
5823 return true;
5824 }
5825
5826 return false;
5827}
5828
5830 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5831 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5832
5833 bool AllowFusionGlobally, HasFMAD, Aggressive;
5834 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5835 return false;
5836
5837 Register LHSReg = MI.getOperand(1).getReg();
5838 Register RHSReg = MI.getOperand(2).getReg();
5839 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5840
5841 unsigned PreferredFusedOpcode =
5842 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5843
5844 MachineInstr *FMulMI;
5845 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5846 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5847 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5848 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5849 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5850 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5851 Register NegX =
5852 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5853 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5854 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5855 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5856 };
5857 return true;
5858 }
5859
5860 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5861 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5862 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5863 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5864 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5865 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5866 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5867 {FMulMI->getOperand(1).getReg(),
5868 FMulMI->getOperand(2).getReg(), LHSReg});
5869 };
5870 return true;
5871 }
5872
5873 return false;
5874}
5875
5877 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5878 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5879
5880 bool AllowFusionGlobally, HasFMAD, Aggressive;
5881 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5882 return false;
5883
5884 Register LHSReg = MI.getOperand(1).getReg();
5885 Register RHSReg = MI.getOperand(2).getReg();
5886 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5887
5888 unsigned PreferredFusedOpcode =
5889 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5890
5891 MachineInstr *FMulMI;
5892 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5893 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5894 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5895 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
5896 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5897 Register FpExtX =
5898 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5899 Register FpExtY =
5900 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5901 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5902 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5903 {FpExtX, FpExtY, NegZ});
5904 };
5905 return true;
5906 }
5907
5908 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5909 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5910 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5911 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
5912 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5913 Register FpExtY =
5914 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5915 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
5916 Register FpExtZ =
5917 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5918 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5919 {NegY, FpExtZ, LHSReg});
5920 };
5921 return true;
5922 }
5923
5924 return false;
5925}
5926
5928 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5929 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5930
5931 bool AllowFusionGlobally, HasFMAD, Aggressive;
5932 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5933 return false;
5934
5935 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5936 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5937 Register LHSReg = MI.getOperand(1).getReg();
5938 Register RHSReg = MI.getOperand(2).getReg();
5939
5940 unsigned PreferredFusedOpcode =
5941 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5942
5943 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
5945 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
5946 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
5947 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
5948 };
5949
5950 MachineInstr *FMulMI;
5951 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
5952 // (fneg (fma (fpext x), (fpext y), z))
5953 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
5954 // (fneg (fma (fpext x), (fpext y), z))
5955 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
5956 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
5957 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5958 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
5959 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5960 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5962 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
5963 FMulMI->getOperand(2).getReg(), RHSReg, B);
5964 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
5965 };
5966 return true;
5967 }
5968
5969 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5970 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5971 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
5972 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
5973 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5974 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
5975 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5976 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5977 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
5978 FMulMI->getOperand(2).getReg(), LHSReg, B);
5979 };
5980 return true;
5981 }
5982
5983 return false;
5984}
5985
5987 unsigned &IdxToPropagate) {
5988 bool PropagateNaN;
5989 switch (MI.getOpcode()) {
5990 default:
5991 return false;
5992 case TargetOpcode::G_FMINNUM:
5993 case TargetOpcode::G_FMAXNUM:
5994 PropagateNaN = false;
5995 break;
5996 case TargetOpcode::G_FMINIMUM:
5997 case TargetOpcode::G_FMAXIMUM:
5998 PropagateNaN = true;
5999 break;
6000 }
6001
6002 auto MatchNaN = [&](unsigned Idx) {
6003 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6004 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6005 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6006 return false;
6007 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6008 return true;
6009 };
6010
6011 return MatchNaN(1) || MatchNaN(2);
6012}
6013
6015 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6016 Register LHS = MI.getOperand(1).getReg();
6017 Register RHS = MI.getOperand(2).getReg();
6018
6019 // Helper lambda to check for opportunities for
6020 // A + (B - A) -> B
6021 // (B - A) + A -> B
6022 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6023 Register Reg;
6024 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6025 Reg == MaybeSameReg;
6026 };
6027 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6028}
6029
6031 Register &MatchInfo) {
6032 // This combine folds the following patterns:
6033 //
6034 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6035 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6036 // into
6037 // x
6038 // if
6039 // k == sizeof(VecEltTy)/2
6040 // type(x) == type(dst)
6041 //
6042 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6043 // into
6044 // x
6045 // if
6046 // type(x) == type(dst)
6047
6048 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6049 LLT DstEltTy = DstVecTy.getElementType();
6050
6051 Register Lo, Hi;
6052
6053 if (mi_match(
6054 MI, MRI,
6056 MatchInfo = Lo;
6057 return MRI.getType(MatchInfo) == DstVecTy;
6058 }
6059
6060 std::optional<ValueAndVReg> ShiftAmount;
6061 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6062 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6063 if (mi_match(
6064 MI, MRI,
6065 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6066 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6067 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6068 MatchInfo = Lo;
6069 return MRI.getType(MatchInfo) == DstVecTy;
6070 }
6071 }
6072
6073 return false;
6074}
6075
6077 Register &MatchInfo) {
6078 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6079 // if type(x) == type(G_TRUNC)
6080 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6081 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6082 return false;
6083
6084 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6085}
6086
6088 Register &MatchInfo) {
6089 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6090 // y if K == size of vector element type
6091 std::optional<ValueAndVReg> ShiftAmt;
6092 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6094 m_GCst(ShiftAmt))))
6095 return false;
6096
6097 LLT MatchTy = MRI.getType(MatchInfo);
6098 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6099 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6100}
6101
6102unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6103 CmpInst::Predicate Pred, LLT DstTy,
6104 SelectPatternNaNBehaviour VsNaNRetVal) const {
6105 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6106 "Expected a NaN behaviour?");
6107 // Choose an opcode based off of legality or the behaviour when one of the
6108 // LHS/RHS may be NaN.
6109 switch (Pred) {
6110 default:
6111 return 0;
6112 case CmpInst::FCMP_UGT:
6113 case CmpInst::FCMP_UGE:
6114 case CmpInst::FCMP_OGT:
6115 case CmpInst::FCMP_OGE:
6116 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6117 return TargetOpcode::G_FMAXNUM;
6118 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6119 return TargetOpcode::G_FMAXIMUM;
6120 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6121 return TargetOpcode::G_FMAXNUM;
6122 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6123 return TargetOpcode::G_FMAXIMUM;
6124 return 0;
6125 case CmpInst::FCMP_ULT:
6126 case CmpInst::FCMP_ULE:
6127 case CmpInst::FCMP_OLT:
6128 case CmpInst::FCMP_OLE:
6129 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6130 return TargetOpcode::G_FMINNUM;
6131 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6132 return TargetOpcode::G_FMINIMUM;
6133 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6134 return TargetOpcode::G_FMINNUM;
6135 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6136 return 0;
6137 return TargetOpcode::G_FMINIMUM;
6138 }
6139}
6140
6141CombinerHelper::SelectPatternNaNBehaviour
6142CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6143 bool IsOrderedComparison) const {
6144 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6145 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6146 // Completely unsafe.
6147 if (!LHSSafe && !RHSSafe)
6148 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6149 if (LHSSafe && RHSSafe)
6150 return SelectPatternNaNBehaviour::RETURNS_ANY;
6151 // An ordered comparison will return false when given a NaN, so it
6152 // returns the RHS.
6153 if (IsOrderedComparison)
6154 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6155 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6156 // An unordered comparison will return true when given a NaN, so it
6157 // returns the LHS.
6158 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6159 : SelectPatternNaNBehaviour::RETURNS_NAN;
6160}
6161
6162bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6163 Register TrueVal, Register FalseVal,
6164 BuildFnTy &MatchInfo) {
6165 // Match: select (fcmp cond x, y) x, y
6166 // select (fcmp cond x, y) y, x
6167 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6168 LLT DstTy = MRI.getType(Dst);
6169 // Bail out early on pointers, since we'll never want to fold to a min/max.
6170 if (DstTy.isPointer())
6171 return false;
6172 // Match a floating point compare with a less-than/greater-than predicate.
6173 // TODO: Allow multiple users of the compare if they are all selects.
6174 CmpInst::Predicate Pred;
6175 Register CmpLHS, CmpRHS;
6176 if (!mi_match(Cond, MRI,
6178 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6179 CmpInst::isEquality(Pred))
6180 return false;
6181 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6182 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6183 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6184 return false;
6185 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6186 std::swap(CmpLHS, CmpRHS);
6187 Pred = CmpInst::getSwappedPredicate(Pred);
6188 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6189 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6190 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6191 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6192 }
6193 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6194 return false;
6195 // Decide what type of max/min this should be based off of the predicate.
6196 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6197 if (!Opc || !isLegal({Opc, {DstTy}}))
6198 return false;
6199 // Comparisons between signed zero and zero may have different results...
6200 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6201 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6202 // We don't know if a comparison between two 0s will give us a consistent
6203 // result. Be conservative and only proceed if at least one side is
6204 // non-zero.
6205 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6206 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6207 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6208 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6209 return false;
6210 }
6211 }
6212 MatchInfo = [=](MachineIRBuilder &B) {
6213 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6214 };
6215 return true;
6216}
6217
6219 BuildFnTy &MatchInfo) {
6220 // TODO: Handle integer cases.
6221 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6222 // Condition may be fed by a truncated compare.
6223 Register Cond = MI.getOperand(1).getReg();
6224 Register MaybeTrunc;
6225 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6226 Cond = MaybeTrunc;
6227 Register Dst = MI.getOperand(0).getReg();
6228 Register TrueVal = MI.getOperand(2).getReg();
6229 Register FalseVal = MI.getOperand(3).getReg();
6230 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6231}
6232
6234 BuildFnTy &MatchInfo) {
6235 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6236 // (X + Y) == X --> Y == 0
6237 // (X + Y) != X --> Y != 0
6238 // (X - Y) == X --> Y == 0
6239 // (X - Y) != X --> Y != 0
6240 // (X ^ Y) == X --> Y == 0
6241 // (X ^ Y) != X --> Y != 0
6242 Register Dst = MI.getOperand(0).getReg();
6243 CmpInst::Predicate Pred;
6244 Register X, Y, OpLHS, OpRHS;
6245 bool MatchedSub = mi_match(
6246 Dst, MRI,
6247 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6248 if (MatchedSub && X != OpLHS)
6249 return false;
6250 if (!MatchedSub) {
6251 if (!mi_match(Dst, MRI,
6252 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6253 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6254 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6255 return false;
6256 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6257 }
6258 MatchInfo = [=](MachineIRBuilder &B) {
6259 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6260 B.buildICmp(Pred, Dst, Y, Zero);
6261 };
6262 return CmpInst::isEquality(Pred) && Y.isValid();
6263}
6264
6266 Register ShiftReg = MI.getOperand(2).getReg();
6267 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6268 auto IsShiftTooBig = [&](const Constant *C) {
6269 auto *CI = dyn_cast<ConstantInt>(C);
6270 return CI && CI->uge(ResTy.getScalarSizeInBits());
6271 };
6272 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6273}
6274
6276 unsigned LHSOpndIdx = 1;
6277 unsigned RHSOpndIdx = 2;
6278 switch (MI.getOpcode()) {
6279 case TargetOpcode::G_UADDO:
6280 case TargetOpcode::G_SADDO:
6281 case TargetOpcode::G_UMULO:
6282 case TargetOpcode::G_SMULO:
6283 LHSOpndIdx = 2;
6284 RHSOpndIdx = 3;
6285 break;
6286 default:
6287 break;
6288 }
6289 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6290 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6291 if (!getIConstantVRegVal(LHS, MRI)) {
6292 // Skip commuting if LHS is not a constant. But, LHS may be a
6293 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6294 // have a constant on the RHS.
6295 if (MRI.getVRegDef(LHS)->getOpcode() !=
6296 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6297 return false;
6298 }
6299 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6300 return MRI.getVRegDef(RHS)->getOpcode() !=
6301 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6303}
6304
6306 Register LHS = MI.getOperand(1).getReg();
6307 Register RHS = MI.getOperand(2).getReg();
6308 std::optional<FPValueAndVReg> ValAndVReg;
6309 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6310 return false;
6311 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6312}
6313
6316 unsigned LHSOpndIdx = 1;
6317 unsigned RHSOpndIdx = 2;
6318 switch (MI.getOpcode()) {
6319 case TargetOpcode::G_UADDO:
6320 case TargetOpcode::G_SADDO:
6321 case TargetOpcode::G_UMULO:
6322 case TargetOpcode::G_SMULO:
6323 LHSOpndIdx = 2;
6324 RHSOpndIdx = 3;
6325 break;
6326 default:
6327 break;
6328 }
6329 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6330 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6331 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6332 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6334}
6335
6336bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6337 LLT SrcTy = MRI.getType(Src);
6338 if (SrcTy.isFixedVector())
6339 return isConstantSplatVector(Src, 1, AllowUndefs);
6340 if (SrcTy.isScalar()) {
6341 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6342 return true;
6343 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6344 return IConstant && IConstant->Value == 1;
6345 }
6346 return false; // scalable vector
6347}
6348
6349bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6350 LLT SrcTy = MRI.getType(Src);
6351 if (SrcTy.isFixedVector())
6352 return isConstantSplatVector(Src, 0, AllowUndefs);
6353 if (SrcTy.isScalar()) {
6354 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6355 return true;
6356 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6357 return IConstant && IConstant->Value == 0;
6358 }
6359 return false; // scalable vector
6360}
6361
6362// Ignores COPYs during conformance checks.
6363// FIXME scalable vectors.
6364bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6365 bool AllowUndefs) {
6366 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6367 if (!BuildVector)
6368 return false;
6369 unsigned NumSources = BuildVector->getNumSources();
6370
6371 for (unsigned I = 0; I < NumSources; ++I) {
6372 GImplicitDef *ImplicitDef =
6373 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6374 if (ImplicitDef && AllowUndefs)
6375 continue;
6376 if (ImplicitDef && !AllowUndefs)
6377 return false;
6378 std::optional<ValueAndVReg> IConstant =
6380 if (IConstant && IConstant->Value == SplatValue)
6381 continue;
6382 return false;
6383 }
6384 return true;
6385}
6386
6387// Ignores COPYs during lookups.
6388// FIXME scalable vectors
6389std::optional<APInt>
6390CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6391 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6392 if (IConstant)
6393 return IConstant->Value;
6394
6395 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6396 if (!BuildVector)
6397 return std::nullopt;
6398 unsigned NumSources = BuildVector->getNumSources();
6399
6400 std::optional<APInt> Value = std::nullopt;
6401 for (unsigned I = 0; I < NumSources; ++I) {
6402 std::optional<ValueAndVReg> IConstant =
6404 if (!IConstant)
6405 return std::nullopt;
6406 if (!Value)
6407 Value = IConstant->Value;
6408 else if (*Value != IConstant->Value)
6409 return std::nullopt;
6410 }
6411 return Value;
6412}
6413
6414// FIXME G_SPLAT_VECTOR
6415bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6416 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6417 if (IConstant)
6418 return true;
6419
6420 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6421 if (!BuildVector)
6422 return false;
6423
6424 unsigned NumSources = BuildVector->getNumSources();
6425 for (unsigned I = 0; I < NumSources; ++I) {
6426 std::optional<ValueAndVReg> IConstant =
6428 if (!IConstant)
6429 return false;
6430 }
6431 return true;
6432}
6433
6434// TODO: use knownbits to determine zeros
6435bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6436 BuildFnTy &MatchInfo) {
6437 uint32_t Flags = Select->getFlags();
6438 Register Dest = Select->getReg(0);
6439 Register Cond = Select->getCondReg();
6440 Register True = Select->getTrueReg();
6441 Register False = Select->getFalseReg();
6442 LLT CondTy = MRI.getType(Select->getCondReg());
6443 LLT TrueTy = MRI.getType(Select->getTrueReg());
6444
6445 // We only do this combine for scalar boolean conditions.
6446 if (CondTy != LLT::scalar(1))
6447 return false;
6448
6449 if (TrueTy.isPointer())
6450 return false;
6451
6452 // Both are scalars.
6453 std::optional<ValueAndVReg> TrueOpt =
6455 std::optional<ValueAndVReg> FalseOpt =
6457
6458 if (!TrueOpt || !FalseOpt)
6459 return false;
6460
6461 APInt TrueValue = TrueOpt->Value;
6462 APInt FalseValue = FalseOpt->Value;
6463
6464 // select Cond, 1, 0 --> zext (Cond)
6465 if (TrueValue.isOne() && FalseValue.isZero()) {
6466 MatchInfo = [=](MachineIRBuilder &B) {
6467 B.setInstrAndDebugLoc(*Select);
6468 B.buildZExtOrTrunc(Dest, Cond);
6469 };
6470 return true;
6471 }
6472
6473 // select Cond, -1, 0 --> sext (Cond)
6474 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6475 MatchInfo = [=](MachineIRBuilder &B) {
6476 B.setInstrAndDebugLoc(*Select);
6477 B.buildSExtOrTrunc(Dest, Cond);
6478 };
6479 return true;
6480 }
6481
6482 // select Cond, 0, 1 --> zext (!Cond)
6483 if (TrueValue.isZero() && FalseValue.isOne()) {
6484 MatchInfo = [=](MachineIRBuilder &B) {
6485 B.setInstrAndDebugLoc(*Select);
6487 B.buildNot(Inner, Cond);
6488 B.buildZExtOrTrunc(Dest, Inner);
6489 };
6490 return true;
6491 }
6492
6493 // select Cond, 0, -1 --> sext (!Cond)
6494 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6495 MatchInfo = [=](MachineIRBuilder &B) {
6496 B.setInstrAndDebugLoc(*Select);
6498 B.buildNot(Inner, Cond);
6499 B.buildSExtOrTrunc(Dest, Inner);
6500 };
6501 return true;
6502 }
6503
6504 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6505 if (TrueValue - 1 == FalseValue) {
6506 MatchInfo = [=](MachineIRBuilder &B) {
6507 B.setInstrAndDebugLoc(*Select);
6509 B.buildZExtOrTrunc(Inner, Cond);
6510 B.buildAdd(Dest, Inner, False);
6511 };
6512 return true;
6513 }
6514
6515 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6516 if (TrueValue + 1 == FalseValue) {
6517 MatchInfo = [=](MachineIRBuilder &B) {
6518 B.setInstrAndDebugLoc(*Select);
6520 B.buildSExtOrTrunc(Inner, Cond);
6521 B.buildAdd(Dest, Inner, False);
6522 };
6523 return true;
6524 }
6525
6526 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6527 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6528 MatchInfo = [=](MachineIRBuilder &B) {
6529 B.setInstrAndDebugLoc(*Select);
6531 B.buildZExtOrTrunc(Inner, Cond);
6532 // The shift amount must be scalar.
6533 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6534 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6535 B.buildShl(Dest, Inner, ShAmtC, Flags);
6536 };
6537 return true;
6538 }
6539 // select Cond, -1, C --> or (sext Cond), C
6540 if (TrueValue.isAllOnes()) {
6541 MatchInfo = [=](MachineIRBuilder &B) {
6542 B.setInstrAndDebugLoc(*Select);
6544 B.buildSExtOrTrunc(Inner, Cond);
6545 B.buildOr(Dest, Inner, False, Flags);
6546 };
6547 return true;
6548 }
6549
6550 // select Cond, C, -1 --> or (sext (not Cond)), C
6551 if (FalseValue.isAllOnes()) {
6552 MatchInfo = [=](MachineIRBuilder &B) {
6553 B.setInstrAndDebugLoc(*Select);
6555 B.buildNot(Not, Cond);
6557 B.buildSExtOrTrunc(Inner, Not);
6558 B.buildOr(Dest, Inner, True, Flags);
6559 };
6560 return true;
6561 }
6562
6563 return false;
6564}
6565
6566// TODO: use knownbits to determine zeros
6567bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6568 BuildFnTy &MatchInfo) {
6569 uint32_t Flags = Select->getFlags();
6570 Register DstReg = Select->getReg(0);
6571 Register Cond = Select->getCondReg();
6572 Register True = Select->getTrueReg();
6573 Register False = Select->getFalseReg();
6574 LLT CondTy = MRI.getType(Select->getCondReg());
6575 LLT TrueTy = MRI.getType(Select->getTrueReg());
6576
6577 // Boolean or fixed vector of booleans.
6578 if (CondTy.isScalableVector() ||
6579 (CondTy.isFixedVector() &&
6580 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6581 CondTy.getScalarSizeInBits() != 1)
6582 return false;
6583
6584 if (CondTy != TrueTy)
6585 return false;
6586
6587 // select Cond, Cond, F --> or Cond, F
6588 // select Cond, 1, F --> or Cond, F
6589 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6590 MatchInfo = [=](MachineIRBuilder &B) {
6591 B.setInstrAndDebugLoc(*Select);
6593 B.buildZExtOrTrunc(Ext, Cond);
6594 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6595 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6596 };
6597 return true;
6598 }
6599
6600 // select Cond, T, Cond --> and Cond, T
6601 // select Cond, T, 0 --> and Cond, T
6602 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6603 MatchInfo = [=](MachineIRBuilder &B) {
6604 B.setInstrAndDebugLoc(*Select);
6606 B.buildZExtOrTrunc(Ext, Cond);
6607 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6608 B.buildAnd(DstReg, Ext, FreezeTrue);
6609 };
6610 return true;
6611 }
6612
6613 // select Cond, T, 1 --> or (not Cond), T
6614 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6615 MatchInfo = [=](MachineIRBuilder &B) {
6616 B.setInstrAndDebugLoc(*Select);
6617 // First the not.
6619 B.buildNot(Inner, Cond);
6620 // Then an ext to match the destination register.
6622 B.buildZExtOrTrunc(Ext, Inner);
6623 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6624 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6625 };
6626 return true;
6627 }
6628
6629 // select Cond, 0, F --> and (not Cond), F
6630 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6631 MatchInfo = [=](MachineIRBuilder &B) {
6632 B.setInstrAndDebugLoc(*Select);
6633 // First the not.
6635 B.buildNot(Inner, Cond);
6636 // Then an ext to match the destination register.
6638 B.buildZExtOrTrunc(Ext, Inner);
6639 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6640 B.buildAnd(DstReg, Ext, FreezeFalse);
6641 };
6642 return true;
6643 }
6644
6645 return false;
6646}
6647
6648bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
6649 BuildFnTy &MatchInfo) {
6650 Register DstReg = Select->getReg(0);
6651 Register Cond = Select->getCondReg();
6652 Register True = Select->getTrueReg();
6653 Register False = Select->getFalseReg();
6654 LLT DstTy = MRI.getType(DstReg);
6655
6656 if (DstTy.isPointer())
6657 return false;
6658
6659 // We need an G_ICMP on the condition register.
6660 GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
6661 if (!Cmp)
6662 return false;
6663
6664 // We want to fold the icmp and replace the select.
6665 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6666 return false;
6667
6668 CmpInst::Predicate Pred = Cmp->getCond();
6669 // We need a larger or smaller predicate for
6670 // canonicalization.
6671 if (CmpInst::isEquality(Pred))
6672 return false;
6673
6674 Register CmpLHS = Cmp->getLHSReg();
6675 Register CmpRHS = Cmp->getRHSReg();
6676
6677 // We can swap CmpLHS and CmpRHS for higher hitrate.
6678 if (True == CmpRHS && False == CmpLHS) {
6679 std::swap(CmpLHS, CmpRHS);
6680 Pred = CmpInst::getSwappedPredicate(Pred);
6681 }
6682
6683 // (icmp X, Y) ? X : Y -> integer minmax.
6684 // see matchSelectPattern in ValueTracking.
6685 // Legality between G_SELECT and integer minmax can differ.
6686 if (True == CmpLHS && False == CmpRHS) {
6687 switch (Pred) {
6688 case ICmpInst::ICMP_UGT:
6689 case ICmpInst::ICMP_UGE: {
6690 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6691 return false;
6692 MatchInfo = [=](MachineIRBuilder &B) {
6693 B.buildUMax(DstReg, True, False);
6694 };
6695 return true;
6696 }
6697 case ICmpInst::ICMP_SGT:
6698 case ICmpInst::ICMP_SGE: {
6699 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6700 return false;
6701 MatchInfo = [=](MachineIRBuilder &B) {
6702 B.buildSMax(DstReg, True, False);
6703 };
6704 return true;
6705 }
6706 case ICmpInst::ICMP_ULT:
6707 case ICmpInst::ICMP_ULE: {
6708 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6709 return false;
6710 MatchInfo = [=](MachineIRBuilder &B) {
6711 B.buildUMin(DstReg, True, False);
6712 };
6713 return true;
6714 }
6715 case ICmpInst::ICMP_SLT:
6716 case ICmpInst::ICMP_SLE: {
6717 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6718 return false;
6719 MatchInfo = [=](MachineIRBuilder &B) {
6720 B.buildSMin(DstReg, True, False);
6721 };
6722 return true;
6723 }
6724 default:
6725 return false;
6726 }
6727 }
6728
6729 return false;
6730}
6731
6733 GSelect *Select = cast<GSelect>(&MI);
6734
6735 if (tryFoldSelectOfConstants(Select, MatchInfo))
6736 return true;
6737
6738 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6739 return true;
6740
6741 if (tryFoldSelectToIntMinMax(Select, MatchInfo))
6742 return true;
6743
6744 return false;
6745}
6746
6747/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6748/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6749/// into a single comparison using range-based reasoning.
6750/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6751bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6752 BuildFnTy &MatchInfo) {
6753 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6754 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6755 Register DstReg = Logic->getReg(0);
6756 Register LHS = Logic->getLHSReg();
6757 Register RHS = Logic->getRHSReg();
6758 unsigned Flags = Logic->getFlags();
6759
6760 // We need an G_ICMP on the LHS register.
6761 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6762 if (!Cmp1)
6763 return false;
6764
6765 // We need an G_ICMP on the RHS register.
6766 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6767 if (!Cmp2)
6768 return false;
6769
6770 // We want to fold the icmps.
6771 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6772 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6773 return false;
6774
6775 APInt C1;
6776 APInt C2;
6777 std::optional<ValueAndVReg> MaybeC1 =
6779 if (!MaybeC1)
6780 return false;
6781 C1 = MaybeC1->Value;
6782
6783 std::optional<ValueAndVReg> MaybeC2 =
6785 if (!MaybeC2)
6786 return false;
6787 C2 = MaybeC2->Value;
6788
6789 Register R1 = Cmp1->getLHSReg();
6790 Register R2 = Cmp2->getLHSReg();
6791 CmpInst::Predicate Pred1 = Cmp1->getCond();
6792 CmpInst::Predicate Pred2 = Cmp2->getCond();
6793 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6794 LLT CmpOperandTy = MRI.getType(R1);
6795
6796 if (CmpOperandTy.isPointer())
6797 return false;
6798
6799 // We build ands, adds, and constants of type CmpOperandTy.
6800 // They must be legal to build.
6801 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6802 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6803 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6804 return false;
6805
6806 // Look through add of a constant offset on R1, R2, or both operands. This
6807 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6808 std::optional<APInt> Offset1;
6809 std::optional<APInt> Offset2;
6810 if (R1 != R2) {
6811 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6812 std::optional<ValueAndVReg> MaybeOffset1 =
6814 if (MaybeOffset1) {
6815 R1 = Add->getLHSReg();
6816 Offset1 = MaybeOffset1->Value;
6817 }
6818 }
6819 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6820 std::optional<ValueAndVReg> MaybeOffset2 =
6822 if (MaybeOffset2) {
6823 R2 = Add->getLHSReg();
6824 Offset2 = MaybeOffset2->Value;
6825 }
6826 }
6827 }
6828
6829 if (R1 != R2)
6830 return false;
6831
6832 // We calculate the icmp ranges including maybe offsets.
6834 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6835 if (Offset1)
6836 CR1 = CR1.subtract(*Offset1);
6837
6839 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6840 if (Offset2)
6841 CR2 = CR2.subtract(*Offset2);
6842
6843 bool CreateMask = false;
6844 APInt LowerDiff;
6845 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6846 if (!CR) {
6847 // We need non-wrapping ranges.
6848 if (CR1.isWrappedSet() || CR2.isWrappedSet())
6849 return false;
6850
6851 // Check whether we have equal-size ranges that only differ by one bit.
6852 // In that case we can apply a mask to map one range onto the other.
6853 LowerDiff = CR1.getLower() ^ CR2.getLower();
6854 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
6855 APInt CR1Size = CR1.getUpper() - CR1.getLower();
6856 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
6857 CR1Size != CR2.getUpper() - CR2.getLower())
6858 return false;
6859
6860 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
6861 CreateMask = true;
6862 }
6863
6864 if (IsAnd)
6865 CR = CR->inverse();
6866
6867 CmpInst::Predicate NewPred;
6868 APInt NewC, Offset;
6869 CR->getEquivalentICmp(NewPred, NewC, Offset);
6870
6871 // We take the result type of one of the original icmps, CmpTy, for
6872 // the to be build icmp. The operand type, CmpOperandTy, is used for
6873 // the other instructions and constants to be build. The types of
6874 // the parameters and output are the same for add and and. CmpTy
6875 // and the type of DstReg might differ. That is why we zext or trunc
6876 // the icmp into the destination register.
6877
6878 MatchInfo = [=](MachineIRBuilder &B) {
6879 if (CreateMask && Offset != 0) {
6880 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6881 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6882 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6883 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
6884 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6885 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6886 B.buildZExtOrTrunc(DstReg, ICmp);
6887 } else if (CreateMask && Offset == 0) {
6888 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6889 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6890 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6891 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
6892 B.buildZExtOrTrunc(DstReg, ICmp);
6893 } else if (!CreateMask && Offset != 0) {
6894 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6895 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
6896 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6897 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6898 B.buildZExtOrTrunc(DstReg, ICmp);
6899 } else if (!CreateMask && Offset == 0) {
6900 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6901 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
6902 B.buildZExtOrTrunc(DstReg, ICmp);
6903 } else {
6904 llvm_unreachable("unexpected configuration of CreateMask and Offset");
6905 }
6906 };
6907 return true;
6908}
6909
6910bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6911 BuildFnTy &MatchInfo) {
6912 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6913 Register DestReg = Logic->getReg(0);
6914 Register LHS = Logic->getLHSReg();
6915 Register RHS = Logic->getRHSReg();
6916 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6917
6918 // We need a compare on the LHS register.
6919 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
6920 if (!Cmp1)
6921 return false;
6922
6923 // We need a compare on the RHS register.
6924 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
6925 if (!Cmp2)
6926 return false;
6927
6928 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6929 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
6930
6931 // We build one fcmp, want to fold the fcmps, replace the logic op,
6932 // and the fcmps must have the same shape.
6934 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
6935 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
6936 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6937 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
6938 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
6939 return false;
6940
6941 CmpInst::Predicate PredL = Cmp1->getCond();
6942 CmpInst::Predicate PredR = Cmp2->getCond();
6943 Register LHS0 = Cmp1->getLHSReg();
6944 Register LHS1 = Cmp1->getRHSReg();
6945 Register RHS0 = Cmp2->getLHSReg();
6946 Register RHS1 = Cmp2->getRHSReg();
6947
6948 if (LHS0 == RHS1 && LHS1 == RHS0) {
6949 // Swap RHS operands to match LHS.
6950 PredR = CmpInst::getSwappedPredicate(PredR);
6951 std::swap(RHS0, RHS1);
6952 }
6953
6954 if (LHS0 == RHS0 && LHS1 == RHS1) {
6955 // We determine the new predicate.
6956 unsigned CmpCodeL = getFCmpCode(PredL);
6957 unsigned CmpCodeR = getFCmpCode(PredR);
6958 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
6959 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
6960 MatchInfo = [=](MachineIRBuilder &B) {
6961 // The fcmp predicates fill the lower part of the enum.
6962 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
6963 if (Pred == FCmpInst::FCMP_FALSE &&
6965 auto False = B.buildConstant(CmpTy, 0);
6966 B.buildZExtOrTrunc(DestReg, False);
6967 } else if (Pred == FCmpInst::FCMP_TRUE &&
6969 auto True =
6970 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
6971 CmpTy.isVector() /*isVector*/,
6972 true /*isFP*/));
6973 B.buildZExtOrTrunc(DestReg, True);
6974 } else { // We take the predicate without predicate optimizations.
6975 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
6976 B.buildZExtOrTrunc(DestReg, Cmp);
6977 }
6978 };
6979 return true;
6980 }
6981
6982 return false;
6983}
6984
6986 GAnd *And = cast<GAnd>(&MI);
6987
6988 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
6989 return true;
6990
6991 if (tryFoldLogicOfFCmps(And, MatchInfo))
6992 return true;
6993
6994 return false;
6995}
6996
6998 GOr *Or = cast<GOr>(&MI);
6999
7000 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7001 return true;
7002
7003 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7004 return true;
7005
7006 return false;
7007}
7008
7010 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7011
7012 // Addo has no flags
7013 Register Dst = Add->getReg(0);
7014 Register Carry = Add->getReg(1);
7015 Register LHS = Add->getLHSReg();
7016 Register RHS = Add->getRHSReg();
7017 bool IsSigned = Add->isSigned();
7018 LLT DstTy = MRI.getType(Dst);
7019 LLT CarryTy = MRI.getType(Carry);
7020
7021 // Fold addo, if the carry is dead -> add, undef.
7022 if (MRI.use_nodbg_empty(Carry) &&
7023 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7024 MatchInfo = [=](MachineIRBuilder &B) {
7025 B.buildAdd(Dst, LHS, RHS);
7026 B.buildUndef(Carry);
7027 };
7028 return true;
7029 }
7030
7031 // Canonicalize constant to RHS.
7032 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7033 if (IsSigned) {
7034 MatchInfo = [=](MachineIRBuilder &B) {
7035 B.buildSAddo(Dst, Carry, RHS, LHS);
7036 };
7037 return true;
7038 }
7039 // !IsSigned
7040 MatchInfo = [=](MachineIRBuilder &B) {
7041 B.buildUAddo(Dst, Carry, RHS, LHS);
7042 };
7043 return true;
7044 }
7045
7046 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7047 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7048
7049 // Fold addo(c1, c2) -> c3, carry.
7050 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7052 bool Overflow;
7053 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7054 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7055 MatchInfo = [=](MachineIRBuilder &B) {
7056 B.buildConstant(Dst, Result);
7057 B.buildConstant(Carry, Overflow);
7058 };
7059 return true;
7060 }
7061
7062 // Fold (addo x, 0) -> x, no carry
7063 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7064 MatchInfo = [=](MachineIRBuilder &B) {
7065 B.buildCopy(Dst, LHS);
7066 B.buildConstant(Carry, 0);
7067 };
7068 return true;
7069 }
7070
7071 // Given 2 constant operands whose sum does not overflow:
7072 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7073 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7074 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7075 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7076 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7077 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7078 std::optional<APInt> MaybeAddRHS =
7079 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7080 if (MaybeAddRHS) {
7081 bool Overflow;
7082 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7083 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7084 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7085 if (IsSigned) {
7086 MatchInfo = [=](MachineIRBuilder &B) {
7087 auto ConstRHS = B.buildConstant(DstTy, NewC);
7088 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7089 };
7090 return true;
7091 }
7092 // !IsSigned
7093 MatchInfo = [=](MachineIRBuilder &B) {
7094 auto ConstRHS = B.buildConstant(DstTy, NewC);
7095 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7096 };
7097 return true;
7098 }
7099 }
7100 };
7101
7102 // We try to combine addo to non-overflowing add.
7103 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7105 return false;
7106
7107 // We try to combine uaddo to non-overflowing add.
7108 if (!IsSigned) {
7109 ConstantRange CRLHS =
7110 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7111 ConstantRange CRRHS =
7112 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7113
7114 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7116 return false;
7118 MatchInfo = [=](MachineIRBuilder &B) {
7119 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7120 B.buildConstant(Carry, 0);
7121 };
7122 return true;
7123 }
7126 MatchInfo = [=](MachineIRBuilder &B) {
7127 B.buildAdd(Dst, LHS, RHS);
7128 B.buildConstant(Carry, 1);
7129 };
7130 return true;
7131 }
7132 }
7133 return false;
7134 }
7135
7136 // We try to combine saddo to non-overflowing add.
7137
7138 // If LHS and RHS each have at least two sign bits, then there is no signed
7139 // overflow.
7140 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7141 MatchInfo = [=](MachineIRBuilder &B) {
7142 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7143 B.buildConstant(Carry, 0);
7144 };
7145 return true;
7146 }
7147
7148 ConstantRange CRLHS =
7149 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7150 ConstantRange CRRHS =
7151 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7152
7153 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7155 return false;
7157 MatchInfo = [=](MachineIRBuilder &B) {
7158 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7159 B.buildConstant(Carry, 0);
7160 };
7161 return true;
7162 }
7165 MatchInfo = [=](MachineIRBuilder &B) {
7166 B.buildAdd(Dst, LHS, RHS);
7167 B.buildConstant(Carry, 1);
7168 };
7169 return true;
7170 }
7171 }
7172
7173 return false;
7174}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1725
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1565
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:1255
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValue() const
Definition: Constants.h:312
const APFloat & getValueAPF() const
Definition: Constants.h:311
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:379
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:549
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:374
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1412
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1372
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1525
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:713
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1495
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1540
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1572
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:644
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1475
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:199
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1405
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:932
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:427
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1597
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1390
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:482
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:490
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:530
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:496
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:536
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:512
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:516
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:540
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:520
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:506
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...