LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
309 auto ConcatMI1 =
310 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
311 auto ConcatMI2 =
312 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
313 if (!ConcatMI1 || !ConcatMI2)
314 return false;
315
316 // Check that the sources of the Concat instructions have the same type
317 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
318 MRI.getType(ConcatMI2->getSourceReg(0)))
319 return false;
320
321 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
322 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
323 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
324 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
325 // Check if the index takes a whole source register from G_CONCAT_VECTORS
326 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
327 if (Mask[i] == -1) {
328 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
329 if (i + j >= Mask.size())
330 return false;
331 if (Mask[i + j] != -1)
332 return false;
333 }
335 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
336 return false;
337 Ops.push_back(0);
338 } else if (Mask[i] % ConcatSrcNumElt == 0) {
339 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
340 if (i + j >= Mask.size())
341 return false;
342 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
343 return false;
344 }
345 // Retrieve the source register from its respective G_CONCAT_VECTORS
346 // instruction
347 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
348 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
349 } else {
350 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
351 ConcatMI1->getNumSources()));
352 }
353 } else {
354 return false;
355 }
356 }
357
359 {TargetOpcode::G_CONCAT_VECTORS,
360 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
361 return false;
362
363 return !Ops.empty();
364}
365
368 LLT SrcTy = MRI.getType(Ops[0]);
369 Register UndefReg = 0;
370
371 for (unsigned i = 0; i < Ops.size(); i++) {
372 if (Ops[i] == 0) {
373 if (UndefReg == 0)
374 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
375 Ops[i] = UndefReg;
376 }
377 }
378
379 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
380 MI.eraseFromParent();
381}
382
385 if (matchCombineShuffleVector(MI, Ops)) {
387 return true;
388 }
389 return false;
390}
391
394 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
395 "Invalid instruction kind");
396 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
397 Register Src1 = MI.getOperand(1).getReg();
398 LLT SrcType = MRI.getType(Src1);
399 // As bizarre as it may look, shuffle vector can actually produce
400 // scalar! This is because at the IR level a <1 x ty> shuffle
401 // vector is perfectly valid.
402 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
403 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
404
405 // If the resulting vector is smaller than the size of the source
406 // vectors being concatenated, we won't be able to replace the
407 // shuffle vector into a concat_vectors.
408 //
409 // Note: We may still be able to produce a concat_vectors fed by
410 // extract_vector_elt and so on. It is less clear that would
411 // be better though, so don't bother for now.
412 //
413 // If the destination is a scalar, the size of the sources doesn't
414 // matter. we will lower the shuffle to a plain copy. This will
415 // work only if the source and destination have the same size. But
416 // that's covered by the next condition.
417 //
418 // TODO: If the size between the source and destination don't match
419 // we could still emit an extract vector element in that case.
420 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
421 return false;
422
423 // Check that the shuffle mask can be broken evenly between the
424 // different sources.
425 if (DstNumElts % SrcNumElts != 0)
426 return false;
427
428 // Mask length is a multiple of the source vector length.
429 // Check if the shuffle is some kind of concatenation of the input
430 // vectors.
431 unsigned NumConcat = DstNumElts / SrcNumElts;
432 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
433 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
434 for (unsigned i = 0; i != DstNumElts; ++i) {
435 int Idx = Mask[i];
436 // Undef value.
437 if (Idx < 0)
438 continue;
439 // Ensure the indices in each SrcType sized piece are sequential and that
440 // the same source is used for the whole piece.
441 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
442 (ConcatSrcs[i / SrcNumElts] >= 0 &&
443 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
444 return false;
445 // Remember which source this index came from.
446 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
447 }
448
449 // The shuffle is concatenating multiple vectors together.
450 // Collect the different operands for that.
451 Register UndefReg;
452 Register Src2 = MI.getOperand(2).getReg();
453 for (auto Src : ConcatSrcs) {
454 if (Src < 0) {
455 if (!UndefReg) {
456 Builder.setInsertPt(*MI.getParent(), MI);
457 UndefReg = Builder.buildUndef(SrcType).getReg(0);
458 }
459 Ops.push_back(UndefReg);
460 } else if (Src == 0)
461 Ops.push_back(Src1);
462 else
463 Ops.push_back(Src2);
464 }
465 return true;
466}
467
469 const ArrayRef<Register> Ops) {
470 Register DstReg = MI.getOperand(0).getReg();
471 Builder.setInsertPt(*MI.getParent(), MI);
472 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
473
474 if (Ops.size() == 1)
475 Builder.buildCopy(NewDstReg, Ops[0]);
476 else
477 Builder.buildMergeLikeInstr(NewDstReg, Ops);
478
479 MI.eraseFromParent();
480 replaceRegWith(MRI, DstReg, NewDstReg);
481}
482
484 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
485 "Invalid instruction kind");
486
487 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
488 return Mask.size() == 1;
489}
490
492 Register DstReg = MI.getOperand(0).getReg();
493 Builder.setInsertPt(*MI.getParent(), MI);
494
495 int I = MI.getOperand(3).getShuffleMask()[0];
496 Register Src1 = MI.getOperand(1).getReg();
497 LLT Src1Ty = MRI.getType(Src1);
498 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
499 Register SrcReg;
500 if (I >= Src1NumElts) {
501 SrcReg = MI.getOperand(2).getReg();
502 I -= Src1NumElts;
503 } else if (I >= 0)
504 SrcReg = Src1;
505
506 if (I < 0)
507 Builder.buildUndef(DstReg);
508 else if (!MRI.getType(SrcReg).isVector())
509 Builder.buildCopy(DstReg, SrcReg);
510 else
512
513 MI.eraseFromParent();
514}
515
516namespace {
517
518/// Select a preference between two uses. CurrentUse is the current preference
519/// while *ForCandidate is attributes of the candidate under consideration.
520PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
521 PreferredTuple &CurrentUse,
522 const LLT TyForCandidate,
523 unsigned OpcodeForCandidate,
524 MachineInstr *MIForCandidate) {
525 if (!CurrentUse.Ty.isValid()) {
526 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
527 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
528 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
529 return CurrentUse;
530 }
531
532 // We permit the extend to hoist through basic blocks but this is only
533 // sensible if the target has extending loads. If you end up lowering back
534 // into a load and extend during the legalizer then the end result is
535 // hoisting the extend up to the load.
536
537 // Prefer defined extensions to undefined extensions as these are more
538 // likely to reduce the number of instructions.
539 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
540 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
541 return CurrentUse;
542 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
543 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
544 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
545
546 // Prefer sign extensions to zero extensions as sign-extensions tend to be
547 // more expensive. Don't do this if the load is already a zero-extend load
548 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
549 // later.
550 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
551 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
552 OpcodeForCandidate == TargetOpcode::G_ZEXT)
553 return CurrentUse;
554 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
555 OpcodeForCandidate == TargetOpcode::G_SEXT)
556 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
557 }
558
559 // This is potentially target specific. We've chosen the largest type
560 // because G_TRUNC is usually free. One potential catch with this is that
561 // some targets have a reduced number of larger registers than smaller
562 // registers and this choice potentially increases the live-range for the
563 // larger value.
564 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
565 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
566 }
567 return CurrentUse;
568}
569
570/// Find a suitable place to insert some instructions and insert them. This
571/// function accounts for special cases like inserting before a PHI node.
572/// The current strategy for inserting before PHI's is to duplicate the
573/// instructions for each predecessor. However, while that's ok for G_TRUNC
574/// on most targets since it generally requires no code, other targets/cases may
575/// want to try harder to find a dominating block.
576static void InsertInsnsWithoutSideEffectsBeforeUse(
579 MachineOperand &UseMO)>
580 Inserter) {
581 MachineInstr &UseMI = *UseMO.getParent();
582
583 MachineBasicBlock *InsertBB = UseMI.getParent();
584
585 // If the use is a PHI then we want the predecessor block instead.
586 if (UseMI.isPHI()) {
587 MachineOperand *PredBB = std::next(&UseMO);
588 InsertBB = PredBB->getMBB();
589 }
590
591 // If the block is the same block as the def then we want to insert just after
592 // the def instead of at the start of the block.
593 if (InsertBB == DefMI.getParent()) {
595 Inserter(InsertBB, std::next(InsertPt), UseMO);
596 return;
597 }
598
599 // Otherwise we want the start of the BB
600 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
601}
602} // end anonymous namespace
603
605 PreferredTuple Preferred;
606 if (matchCombineExtendingLoads(MI, Preferred)) {
607 applyCombineExtendingLoads(MI, Preferred);
608 return true;
609 }
610 return false;
611}
612
613static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
614 unsigned CandidateLoadOpc;
615 switch (ExtOpc) {
616 case TargetOpcode::G_ANYEXT:
617 CandidateLoadOpc = TargetOpcode::G_LOAD;
618 break;
619 case TargetOpcode::G_SEXT:
620 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
621 break;
622 case TargetOpcode::G_ZEXT:
623 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
624 break;
625 default:
626 llvm_unreachable("Unexpected extend opc");
627 }
628 return CandidateLoadOpc;
629}
630
632 PreferredTuple &Preferred) {
633 // We match the loads and follow the uses to the extend instead of matching
634 // the extends and following the def to the load. This is because the load
635 // must remain in the same position for correctness (unless we also add code
636 // to find a safe place to sink it) whereas the extend is freely movable.
637 // It also prevents us from duplicating the load for the volatile case or just
638 // for performance.
639 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
640 if (!LoadMI)
641 return false;
642
643 Register LoadReg = LoadMI->getDstReg();
644
645 LLT LoadValueTy = MRI.getType(LoadReg);
646 if (!LoadValueTy.isScalar())
647 return false;
648
649 // Most architectures are going to legalize <s8 loads into at least a 1 byte
650 // load, and the MMOs can only describe memory accesses in multiples of bytes.
651 // If we try to perform extload combining on those, we can end up with
652 // %a(s8) = extload %ptr (load 1 byte from %ptr)
653 // ... which is an illegal extload instruction.
654 if (LoadValueTy.getSizeInBits() < 8)
655 return false;
656
657 // For non power-of-2 types, they will very likely be legalized into multiple
658 // loads. Don't bother trying to match them into extending loads.
659 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
660 return false;
661
662 // Find the preferred type aside from the any-extends (unless it's the only
663 // one) and non-extending ops. We'll emit an extending load to that type and
664 // and emit a variant of (extend (trunc X)) for the others according to the
665 // relative type sizes. At the same time, pick an extend to use based on the
666 // extend involved in the chosen type.
667 unsigned PreferredOpcode =
668 isa<GLoad>(&MI)
669 ? TargetOpcode::G_ANYEXT
670 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
671 Preferred = {LLT(), PreferredOpcode, nullptr};
672 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
673 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
674 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
675 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
676 const auto &MMO = LoadMI->getMMO();
677 // Don't do anything for atomics.
678 if (MMO.isAtomic())
679 continue;
680 // Check for legality.
681 if (!isPreLegalize()) {
682 LegalityQuery::MemDesc MMDesc(MMO);
683 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
684 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
685 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
686 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
687 .Action != LegalizeActions::Legal)
688 continue;
689 }
690 Preferred = ChoosePreferredUse(MI, Preferred,
691 MRI.getType(UseMI.getOperand(0).getReg()),
692 UseMI.getOpcode(), &UseMI);
693 }
694 }
695
696 // There were no extends
697 if (!Preferred.MI)
698 return false;
699 // It should be impossible to chose an extend without selecting a different
700 // type since by definition the result of an extend is larger.
701 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
702
703 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
704 return true;
705}
706
708 PreferredTuple &Preferred) {
709 // Rewrite the load to the chosen extending load.
710 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
711
712 // Inserter to insert a truncate back to the original type at a given point
713 // with some basic CSE to limit truncate duplication to one per BB.
715 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
716 MachineBasicBlock::iterator InsertBefore,
717 MachineOperand &UseMO) {
718 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
719 if (PreviouslyEmitted) {
721 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
723 return;
724 }
725
726 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
727 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
728 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
729 EmittedInsns[InsertIntoBB] = NewMI;
730 replaceRegOpWith(MRI, UseMO, NewDstReg);
731 };
732
734 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
735 MI.setDesc(Builder.getTII().get(LoadOpc));
736
737 // Rewrite all the uses to fix up the types.
738 auto &LoadValue = MI.getOperand(0);
740 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
741 Uses.push_back(&UseMO);
742
743 for (auto *UseMO : Uses) {
744 MachineInstr *UseMI = UseMO->getParent();
745
746 // If the extend is compatible with the preferred extend then we should fix
747 // up the type and extend so that it uses the preferred use.
748 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
749 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
750 Register UseDstReg = UseMI->getOperand(0).getReg();
751 MachineOperand &UseSrcMO = UseMI->getOperand(1);
752 const LLT UseDstTy = MRI.getType(UseDstReg);
753 if (UseDstReg != ChosenDstReg) {
754 if (Preferred.Ty == UseDstTy) {
755 // If the use has the same type as the preferred use, then merge
756 // the vregs and erase the extend. For example:
757 // %1:_(s8) = G_LOAD ...
758 // %2:_(s32) = G_SEXT %1(s8)
759 // %3:_(s32) = G_ANYEXT %1(s8)
760 // ... = ... %3(s32)
761 // rewrites to:
762 // %2:_(s32) = G_SEXTLOAD ...
763 // ... = ... %2(s32)
764 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
766 UseMO->getParent()->eraseFromParent();
767 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
768 // If the preferred size is smaller, then keep the extend but extend
769 // from the result of the extending load. For example:
770 // %1:_(s8) = G_LOAD ...
771 // %2:_(s32) = G_SEXT %1(s8)
772 // %3:_(s64) = G_ANYEXT %1(s8)
773 // ... = ... %3(s64)
774 /// rewrites to:
775 // %2:_(s32) = G_SEXTLOAD ...
776 // %3:_(s64) = G_ANYEXT %2:_(s32)
777 // ... = ... %3(s64)
778 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
779 } else {
780 // If the preferred size is large, then insert a truncate. For
781 // example:
782 // %1:_(s8) = G_LOAD ...
783 // %2:_(s64) = G_SEXT %1(s8)
784 // %3:_(s32) = G_ZEXT %1(s8)
785 // ... = ... %3(s32)
786 /// rewrites to:
787 // %2:_(s64) = G_SEXTLOAD ...
788 // %4:_(s8) = G_TRUNC %2:_(s32)
789 // %3:_(s64) = G_ZEXT %2:_(s8)
790 // ... = ... %3(s64)
791 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
792 InsertTruncAt);
793 }
794 continue;
795 }
796 // The use is (one of) the uses of the preferred use we chose earlier.
797 // We're going to update the load to def this value later so just erase
798 // the old extend.
800 UseMO->getParent()->eraseFromParent();
801 continue;
802 }
803
804 // The use isn't an extend. Truncate back to the type we originally loaded.
805 // This is free on many targets.
806 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
807 }
808
809 MI.getOperand(0).setReg(ChosenDstReg);
811}
812
814 BuildFnTy &MatchInfo) {
815 assert(MI.getOpcode() == TargetOpcode::G_AND);
816
817 // If we have the following code:
818 // %mask = G_CONSTANT 255
819 // %ld = G_LOAD %ptr, (load s16)
820 // %and = G_AND %ld, %mask
821 //
822 // Try to fold it into
823 // %ld = G_ZEXTLOAD %ptr, (load s8)
824
825 Register Dst = MI.getOperand(0).getReg();
826 if (MRI.getType(Dst).isVector())
827 return false;
828
829 auto MaybeMask =
830 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
831 if (!MaybeMask)
832 return false;
833
834 APInt MaskVal = MaybeMask->Value;
835
836 if (!MaskVal.isMask())
837 return false;
838
839 Register SrcReg = MI.getOperand(1).getReg();
840 // Don't use getOpcodeDef() here since intermediate instructions may have
841 // multiple users.
842 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
843 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
844 return false;
845
846 Register LoadReg = LoadMI->getDstReg();
847 LLT RegTy = MRI.getType(LoadReg);
848 Register PtrReg = LoadMI->getPointerReg();
849 unsigned RegSize = RegTy.getSizeInBits();
850 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
851 unsigned MaskSizeBits = MaskVal.countr_one();
852
853 // The mask may not be larger than the in-memory type, as it might cover sign
854 // extended bits
855 if (MaskSizeBits > LoadSizeBits.getValue())
856 return false;
857
858 // If the mask covers the whole destination register, there's nothing to
859 // extend
860 if (MaskSizeBits >= RegSize)
861 return false;
862
863 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
864 // at least byte loads. Avoid creating such loads here
865 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
866 return false;
867
868 const MachineMemOperand &MMO = LoadMI->getMMO();
869 LegalityQuery::MemDesc MemDesc(MMO);
870
871 // Don't modify the memory access size if this is atomic/volatile, but we can
872 // still adjust the opcode to indicate the high bit behavior.
873 if (LoadMI->isSimple())
874 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
875 else if (LoadSizeBits.getValue() > MaskSizeBits ||
876 LoadSizeBits.getValue() == RegSize)
877 return false;
878
879 // TODO: Could check if it's legal with the reduced or original memory size.
881 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
882 return false;
883
884 MatchInfo = [=](MachineIRBuilder &B) {
885 B.setInstrAndDebugLoc(*LoadMI);
886 auto &MF = B.getMF();
887 auto PtrInfo = MMO.getPointerInfo();
888 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
889 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
890 LoadMI->eraseFromParent();
891 };
892 return true;
893}
894
896 const MachineInstr &UseMI) {
897 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
898 "shouldn't consider debug uses");
899 assert(DefMI.getParent() == UseMI.getParent());
900 if (&DefMI == &UseMI)
901 return true;
902 const MachineBasicBlock &MBB = *DefMI.getParent();
903 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
904 return &MI == &DefMI || &MI == &UseMI;
905 });
906 if (DefOrUse == MBB.end())
907 llvm_unreachable("Block must contain both DefMI and UseMI!");
908 return &*DefOrUse == &DefMI;
909}
910
912 const MachineInstr &UseMI) {
913 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
914 "shouldn't consider debug uses");
915 if (MDT)
916 return MDT->dominates(&DefMI, &UseMI);
917 else if (DefMI.getParent() != UseMI.getParent())
918 return false;
919
920 return isPredecessor(DefMI, UseMI);
921}
922
924 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
925 Register SrcReg = MI.getOperand(1).getReg();
926 Register LoadUser = SrcReg;
927
928 if (MRI.getType(SrcReg).isVector())
929 return false;
930
931 Register TruncSrc;
932 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
933 LoadUser = TruncSrc;
934
935 uint64_t SizeInBits = MI.getOperand(2).getImm();
936 // If the source is a G_SEXTLOAD from the same bit width, then we don't
937 // need any extend at all, just a truncate.
938 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
939 // If truncating more than the original extended value, abort.
940 auto LoadSizeBits = LoadMI->getMemSizeInBits();
941 if (TruncSrc &&
942 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
943 return false;
944 if (LoadSizeBits == SizeInBits)
945 return true;
946 }
947 return false;
948}
949
951 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
952 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
953 MI.eraseFromParent();
954}
955
957 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
958 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
959
960 Register DstReg = MI.getOperand(0).getReg();
961 LLT RegTy = MRI.getType(DstReg);
962
963 // Only supports scalars for now.
964 if (RegTy.isVector())
965 return false;
966
967 Register SrcReg = MI.getOperand(1).getReg();
968 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
969 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
970 return false;
971
972 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
973
974 // If the sign extend extends from a narrower width than the load's width,
975 // then we can narrow the load width when we combine to a G_SEXTLOAD.
976 // Avoid widening the load at all.
977 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
978
979 // Don't generate G_SEXTLOADs with a < 1 byte width.
980 if (NewSizeBits < 8)
981 return false;
982 // Don't bother creating a non-power-2 sextload, it will likely be broken up
983 // anyway for most targets.
984 if (!isPowerOf2_32(NewSizeBits))
985 return false;
986
987 const MachineMemOperand &MMO = LoadDef->getMMO();
988 LegalityQuery::MemDesc MMDesc(MMO);
989
990 // Don't modify the memory access size if this is atomic/volatile, but we can
991 // still adjust the opcode to indicate the high bit behavior.
992 if (LoadDef->isSimple())
993 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
994 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
995 return false;
996
997 // TODO: Could check if it's legal with the reduced or original memory size.
998 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
999 {MRI.getType(LoadDef->getDstReg()),
1000 MRI.getType(LoadDef->getPointerReg())},
1001 {MMDesc}}))
1002 return false;
1003
1004 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1005 return true;
1006}
1007
1009 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1010 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1011 Register LoadReg;
1012 unsigned ScalarSizeBits;
1013 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1014 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1015
1016 // If we have the following:
1017 // %ld = G_LOAD %ptr, (load 2)
1018 // %ext = G_SEXT_INREG %ld, 8
1019 // ==>
1020 // %ld = G_SEXTLOAD %ptr (load 1)
1021
1022 auto &MMO = LoadDef->getMMO();
1023 Builder.setInstrAndDebugLoc(*LoadDef);
1024 auto &MF = Builder.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1027 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1028 LoadDef->getPointerReg(), *NewMMO);
1029 MI.eraseFromParent();
1030}
1031
1033 if (Ty.isVector())
1035 Ty.getNumElements());
1036 return IntegerType::get(C, Ty.getSizeInBits());
1037}
1038
1039/// Return true if 'MI' is a load or a store that may be fold it's address
1040/// operand into the load / store addressing mode.
1044 auto *MF = MI->getMF();
1045 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1046 if (!Addr)
1047 return false;
1048
1049 AM.HasBaseReg = true;
1050 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1051 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1052 else
1053 AM.Scale = 1; // [reg +/- reg]
1054
1055 return TLI.isLegalAddressingMode(
1056 MF->getDataLayout(), AM,
1057 getTypeForLLT(MI->getMMO().getMemoryType(),
1058 MF->getFunction().getContext()),
1059 MI->getMMO().getAddrSpace());
1060}
1061
1062static unsigned getIndexedOpc(unsigned LdStOpc) {
1063 switch (LdStOpc) {
1064 case TargetOpcode::G_LOAD:
1065 return TargetOpcode::G_INDEXED_LOAD;
1066 case TargetOpcode::G_STORE:
1067 return TargetOpcode::G_INDEXED_STORE;
1068 case TargetOpcode::G_ZEXTLOAD:
1069 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1070 case TargetOpcode::G_SEXTLOAD:
1071 return TargetOpcode::G_INDEXED_SEXTLOAD;
1072 default:
1073 llvm_unreachable("Unexpected opcode");
1074 }
1075}
1076
1077bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1078 // Check for legality.
1079 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1080 LLT Ty = MRI.getType(LdSt.getReg(0));
1081 LLT MemTy = LdSt.getMMO().getMemoryType();
1083 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1084 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1085 SmallVector<LLT> OpTys;
1086 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1087 OpTys = {PtrTy, Ty, Ty};
1088 else
1089 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1090
1091 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1092 return isLegal(Q);
1093}
1094
1096 "post-index-use-threshold", cl::Hidden, cl::init(32),
1097 cl::desc("Number of uses of a base pointer to check before it is no longer "
1098 "considered for post-indexing."));
1099
1100bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1102 bool &RematOffset) {
1103 // We're looking for the following pattern, for either load or store:
1104 // %baseptr:_(p0) = ...
1105 // G_STORE %val(s64), %baseptr(p0)
1106 // %offset:_(s64) = G_CONSTANT i64 -256
1107 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1108 const auto &TLI = getTargetLowering();
1109
1110 Register Ptr = LdSt.getPointerReg();
1111 // If the store is the only use, don't bother.
1112 if (MRI.hasOneNonDBGUse(Ptr))
1113 return false;
1114
1115 if (!isIndexedLoadStoreLegal(LdSt))
1116 return false;
1117
1118 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1119 return false;
1120
1121 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1122 auto *PtrDef = MRI.getVRegDef(Ptr);
1123
1124 unsigned NumUsesChecked = 0;
1125 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1126 if (++NumUsesChecked > PostIndexUseThreshold)
1127 return false; // Try to avoid exploding compile time.
1128
1129 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1130 // The use itself might be dead. This can happen during combines if DCE
1131 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1132 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1133 continue;
1134
1135 // Check the user of this isn't the store, otherwise we'd be generate a
1136 // indexed store defining its own use.
1137 if (StoredValDef == &Use)
1138 continue;
1139
1140 Offset = PtrAdd->getOffsetReg();
1141 if (!ForceLegalIndexing &&
1142 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1143 /*IsPre*/ false, MRI))
1144 continue;
1145
1146 // Make sure the offset calculation is before the potentially indexed op.
1147 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1148 RematOffset = false;
1149 if (!dominates(*OffsetDef, LdSt)) {
1150 // If the offset however is just a G_CONSTANT, we can always just
1151 // rematerialize it where we need it.
1152 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1153 continue;
1154 RematOffset = true;
1155 }
1156
1157 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1158 if (&BasePtrUse == PtrDef)
1159 continue;
1160
1161 // If the user is a later load/store that can be post-indexed, then don't
1162 // combine this one.
1163 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1164 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1165 dominates(LdSt, *BasePtrLdSt) &&
1166 isIndexedLoadStoreLegal(*BasePtrLdSt))
1167 return false;
1168
1169 // Now we're looking for the key G_PTR_ADD instruction, which contains
1170 // the offset add that we want to fold.
1171 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1172 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1173 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1174 // If the use is in a different block, then we may produce worse code
1175 // due to the extra register pressure.
1176 if (BaseUseUse.getParent() != LdSt.getParent())
1177 return false;
1178
1179 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1180 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1181 return false;
1182 }
1183 if (!dominates(LdSt, BasePtrUse))
1184 return false; // All use must be dominated by the load/store.
1185 }
1186 }
1187
1188 Addr = PtrAdd->getReg(0);
1189 Base = PtrAdd->getBaseReg();
1190 return true;
1191 }
1192
1193 return false;
1194}
1195
1196bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1198 auto &MF = *LdSt.getParent()->getParent();
1199 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1200
1201 Addr = LdSt.getPointerReg();
1204 return false;
1205
1206 if (!ForceLegalIndexing &&
1207 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1208 return false;
1209
1210 if (!isIndexedLoadStoreLegal(LdSt))
1211 return false;
1212
1214 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1215 return false;
1216
1217 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1218 // Would require a copy.
1219 if (Base == St->getValueReg())
1220 return false;
1221
1222 // We're expecting one use of Addr in MI, but it could also be the
1223 // value stored, which isn't actually dominated by the instruction.
1224 if (St->getValueReg() == Addr)
1225 return false;
1226 }
1227
1228 // Avoid increasing cross-block register pressure.
1229 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1230 if (AddrUse.getParent() != LdSt.getParent())
1231 return false;
1232
1233 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1234 // That might allow us to end base's liveness here by adjusting the constant.
1235 bool RealUse = false;
1236 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1237 if (!dominates(LdSt, AddrUse))
1238 return false; // All use must be dominated by the load/store.
1239
1240 // If Ptr may be folded in addressing mode of other use, then it's
1241 // not profitable to do this transformation.
1242 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1243 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1244 RealUse = true;
1245 } else {
1246 RealUse = true;
1247 }
1248 }
1249 return RealUse;
1250}
1251
1253 BuildFnTy &MatchInfo) {
1254 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1255
1256 // Check if there is a load that defines the vector being extracted from.
1257 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1258 if (!LoadMI)
1259 return false;
1260
1261 Register Vector = MI.getOperand(1).getReg();
1262 LLT VecEltTy = MRI.getType(Vector).getElementType();
1263
1264 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1265
1266 // Checking whether we should reduce the load width.
1268 return false;
1269
1270 // Check if the defining load is simple.
1271 if (!LoadMI->isSimple())
1272 return false;
1273
1274 // If the vector element type is not a multiple of a byte then we are unable
1275 // to correctly compute an address to load only the extracted element as a
1276 // scalar.
1277 if (!VecEltTy.isByteSized())
1278 return false;
1279
1280 // Check for load fold barriers between the extraction and the load.
1281 if (MI.getParent() != LoadMI->getParent())
1282 return false;
1283 const unsigned MaxIter = 20;
1284 unsigned Iter = 0;
1285 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1286 if (II->isLoadFoldBarrier())
1287 return false;
1288 if (Iter++ == MaxIter)
1289 return false;
1290 }
1291
1292 // Check if the new load that we are going to create is legal
1293 // if we are in the post-legalization phase.
1294 MachineMemOperand MMO = LoadMI->getMMO();
1295 Align Alignment = MMO.getAlign();
1296 MachinePointerInfo PtrInfo;
1298
1299 // Finding the appropriate PtrInfo if offset is a known constant.
1300 // This is required to create the memory operand for the narrowed load.
1301 // This machine memory operand object helps us infer about legality
1302 // before we proceed to combine the instruction.
1303 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1304 int Elt = CVal->getZExtValue();
1305 // FIXME: should be (ABI size)*Elt.
1306 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1307 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1308 } else {
1309 // Discard the pointer info except the address space because the memory
1310 // operand can't represent this new access since the offset is variable.
1311 Offset = VecEltTy.getSizeInBits() / 8;
1313 }
1314
1315 Alignment = commonAlignment(Alignment, Offset);
1316
1317 Register VecPtr = LoadMI->getPointerReg();
1318 LLT PtrTy = MRI.getType(VecPtr);
1319
1320 MachineFunction &MF = *MI.getMF();
1321 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1322
1323 LegalityQuery::MemDesc MMDesc(*NewMMO);
1324
1325 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1326
1328 return false;
1329
1330 // Load must be allowed and fast on the target.
1332 auto &DL = MF.getDataLayout();
1333 unsigned Fast = 0;
1334 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1335 &Fast) ||
1336 !Fast)
1337 return false;
1338
1339 Register Result = MI.getOperand(0).getReg();
1340 Register Index = MI.getOperand(2).getReg();
1341
1342 MatchInfo = [=](MachineIRBuilder &B) {
1343 GISelObserverWrapper DummyObserver;
1344 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1345 //// Get pointer to the vector element.
1346 Register finalPtr = Helper.getVectorElementPointer(
1347 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1348 Index);
1349 // New G_LOAD instruction.
1350 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1351 // Remove original GLOAD instruction.
1352 LoadMI->eraseFromParent();
1353 };
1354
1355 return true;
1356}
1357
1360 auto &LdSt = cast<GLoadStore>(MI);
1361
1362 if (LdSt.isAtomic())
1363 return false;
1364
1365 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1366 MatchInfo.Offset);
1367 if (!MatchInfo.IsPre &&
1368 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1369 MatchInfo.Offset, MatchInfo.RematOffset))
1370 return false;
1371
1372 return true;
1373}
1374
1377 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1378 unsigned Opcode = MI.getOpcode();
1379 bool IsStore = Opcode == TargetOpcode::G_STORE;
1380 unsigned NewOpcode = getIndexedOpc(Opcode);
1381
1382 // If the offset constant didn't happen to dominate the load/store, we can
1383 // just clone it as needed.
1384 if (MatchInfo.RematOffset) {
1385 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1386 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1387 *OldCst->getOperand(1).getCImm());
1388 MatchInfo.Offset = NewCst.getReg(0);
1389 }
1390
1391 auto MIB = Builder.buildInstr(NewOpcode);
1392 if (IsStore) {
1393 MIB.addDef(MatchInfo.Addr);
1394 MIB.addUse(MI.getOperand(0).getReg());
1395 } else {
1396 MIB.addDef(MI.getOperand(0).getReg());
1397 MIB.addDef(MatchInfo.Addr);
1398 }
1399
1400 MIB.addUse(MatchInfo.Base);
1401 MIB.addUse(MatchInfo.Offset);
1402 MIB.addImm(MatchInfo.IsPre);
1403 MIB->cloneMemRefs(*MI.getMF(), MI);
1404 MI.eraseFromParent();
1405 AddrDef.eraseFromParent();
1406
1407 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1408}
1409
1411 MachineInstr *&OtherMI) {
1412 unsigned Opcode = MI.getOpcode();
1413 bool IsDiv, IsSigned;
1414
1415 switch (Opcode) {
1416 default:
1417 llvm_unreachable("Unexpected opcode!");
1418 case TargetOpcode::G_SDIV:
1419 case TargetOpcode::G_UDIV: {
1420 IsDiv = true;
1421 IsSigned = Opcode == TargetOpcode::G_SDIV;
1422 break;
1423 }
1424 case TargetOpcode::G_SREM:
1425 case TargetOpcode::G_UREM: {
1426 IsDiv = false;
1427 IsSigned = Opcode == TargetOpcode::G_SREM;
1428 break;
1429 }
1430 }
1431
1432 Register Src1 = MI.getOperand(1).getReg();
1433 unsigned DivOpcode, RemOpcode, DivremOpcode;
1434 if (IsSigned) {
1435 DivOpcode = TargetOpcode::G_SDIV;
1436 RemOpcode = TargetOpcode::G_SREM;
1437 DivremOpcode = TargetOpcode::G_SDIVREM;
1438 } else {
1439 DivOpcode = TargetOpcode::G_UDIV;
1440 RemOpcode = TargetOpcode::G_UREM;
1441 DivremOpcode = TargetOpcode::G_UDIVREM;
1442 }
1443
1444 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1445 return false;
1446
1447 // Combine:
1448 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1449 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1450 // into:
1451 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1452
1453 // Combine:
1454 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1455 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1456 // into:
1457 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1458
1459 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1460 if (MI.getParent() == UseMI.getParent() &&
1461 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1462 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1463 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1464 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1465 OtherMI = &UseMI;
1466 return true;
1467 }
1468 }
1469
1470 return false;
1471}
1472
1474 MachineInstr *&OtherMI) {
1475 unsigned Opcode = MI.getOpcode();
1476 assert(OtherMI && "OtherMI shouldn't be empty.");
1477
1478 Register DestDivReg, DestRemReg;
1479 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1480 DestDivReg = MI.getOperand(0).getReg();
1481 DestRemReg = OtherMI->getOperand(0).getReg();
1482 } else {
1483 DestDivReg = OtherMI->getOperand(0).getReg();
1484 DestRemReg = MI.getOperand(0).getReg();
1485 }
1486
1487 bool IsSigned =
1488 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1489
1490 // Check which instruction is first in the block so we don't break def-use
1491 // deps by "moving" the instruction incorrectly. Also keep track of which
1492 // instruction is first so we pick it's operands, avoiding use-before-def
1493 // bugs.
1494 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1495 Builder.setInstrAndDebugLoc(*FirstInst);
1496
1497 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1498 : TargetOpcode::G_UDIVREM,
1499 {DestDivReg, DestRemReg},
1500 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1501 MI.eraseFromParent();
1502 OtherMI->eraseFromParent();
1503}
1504
1506 MachineInstr *&BrCond) {
1507 assert(MI.getOpcode() == TargetOpcode::G_BR);
1508
1509 // Try to match the following:
1510 // bb1:
1511 // G_BRCOND %c1, %bb2
1512 // G_BR %bb3
1513 // bb2:
1514 // ...
1515 // bb3:
1516
1517 // The above pattern does not have a fall through to the successor bb2, always
1518 // resulting in a branch no matter which path is taken. Here we try to find
1519 // and replace that pattern with conditional branch to bb3 and otherwise
1520 // fallthrough to bb2. This is generally better for branch predictors.
1521
1522 MachineBasicBlock *MBB = MI.getParent();
1524 if (BrIt == MBB->begin())
1525 return false;
1526 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1527
1528 BrCond = &*std::prev(BrIt);
1529 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1530 return false;
1531
1532 // Check that the next block is the conditional branch target. Also make sure
1533 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1534 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1535 return BrCondTarget != MI.getOperand(0).getMBB() &&
1536 MBB->isLayoutSuccessor(BrCondTarget);
1537}
1538
1540 MachineInstr *&BrCond) {
1541 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1543 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1544 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1545 // this to i1 only since we might not know for sure what kind of
1546 // compare generated the condition value.
1547 auto True = Builder.buildConstant(
1548 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1549 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1550
1551 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1553 MI.getOperand(0).setMBB(FallthroughBB);
1555
1556 // Change the conditional branch to use the inverted condition and
1557 // new target block.
1558 Observer.changingInstr(*BrCond);
1559 BrCond->getOperand(0).setReg(Xor.getReg(0));
1560 BrCond->getOperand(1).setMBB(BrTarget);
1561 Observer.changedInstr(*BrCond);
1562}
1563
1564
1566 MachineIRBuilder HelperBuilder(MI);
1567 GISelObserverWrapper DummyObserver;
1568 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1569 return Helper.lowerMemcpyInline(MI) ==
1571}
1572
1574 MachineIRBuilder HelperBuilder(MI);
1575 GISelObserverWrapper DummyObserver;
1576 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1577 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1579}
1580
1582 const MachineRegisterInfo &MRI,
1583 const APFloat &Val) {
1584 APFloat Result(Val);
1585 switch (MI.getOpcode()) {
1586 default:
1587 llvm_unreachable("Unexpected opcode!");
1588 case TargetOpcode::G_FNEG: {
1589 Result.changeSign();
1590 return Result;
1591 }
1592 case TargetOpcode::G_FABS: {
1593 Result.clearSign();
1594 return Result;
1595 }
1596 case TargetOpcode::G_FPTRUNC: {
1597 bool Unused;
1598 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1600 &Unused);
1601 return Result;
1602 }
1603 case TargetOpcode::G_FSQRT: {
1604 bool Unused;
1606 &Unused);
1607 Result = APFloat(sqrt(Result.convertToDouble()));
1608 break;
1609 }
1610 case TargetOpcode::G_FLOG2: {
1611 bool Unused;
1613 &Unused);
1614 Result = APFloat(log2(Result.convertToDouble()));
1615 break;
1616 }
1617 }
1618 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1619 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1620 // `G_FLOG2` reach here.
1621 bool Unused;
1622 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1623 return Result;
1624}
1625
1627 const ConstantFP *Cst) {
1628 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1629 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1630 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1631 MI.eraseFromParent();
1632}
1633
1635 PtrAddChain &MatchInfo) {
1636 // We're trying to match the following pattern:
1637 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1638 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1639 // -->
1640 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1641
1642 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1643 return false;
1644
1645 Register Add2 = MI.getOperand(1).getReg();
1646 Register Imm1 = MI.getOperand(2).getReg();
1647 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1648 if (!MaybeImmVal)
1649 return false;
1650
1651 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1652 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1653 return false;
1654
1655 Register Base = Add2Def->getOperand(1).getReg();
1656 Register Imm2 = Add2Def->getOperand(2).getReg();
1657 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1658 if (!MaybeImm2Val)
1659 return false;
1660
1661 // Check if the new combined immediate forms an illegal addressing mode.
1662 // Do not combine if it was legal before but would get illegal.
1663 // To do so, we need to find a load/store user of the pointer to get
1664 // the access type.
1665 Type *AccessTy = nullptr;
1666 auto &MF = *MI.getMF();
1667 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1668 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1669 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1670 MF.getFunction().getContext());
1671 break;
1672 }
1673 }
1675 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1676 AMNew.BaseOffs = CombinedImm.getSExtValue();
1677 if (AccessTy) {
1678 AMNew.HasBaseReg = true;
1680 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1681 AMOld.HasBaseReg = true;
1682 unsigned AS = MRI.getType(Add2).getAddressSpace();
1683 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1684 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1685 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1686 return false;
1687 }
1688
1689 // Pass the combined immediate to the apply function.
1690 MatchInfo.Imm = AMNew.BaseOffs;
1691 MatchInfo.Base = Base;
1692 MatchInfo.Bank = getRegBank(Imm2);
1693 return true;
1694}
1695
1697 PtrAddChain &MatchInfo) {
1698 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1699 MachineIRBuilder MIB(MI);
1700 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1701 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1702 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1704 MI.getOperand(1).setReg(MatchInfo.Base);
1705 MI.getOperand(2).setReg(NewOffset.getReg(0));
1707}
1708
1710 RegisterImmPair &MatchInfo) {
1711 // We're trying to match the following pattern with any of
1712 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1713 // %t1 = SHIFT %base, G_CONSTANT imm1
1714 // %root = SHIFT %t1, G_CONSTANT imm2
1715 // -->
1716 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1717
1718 unsigned Opcode = MI.getOpcode();
1719 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1720 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1721 Opcode == TargetOpcode::G_USHLSAT) &&
1722 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1723
1724 Register Shl2 = MI.getOperand(1).getReg();
1725 Register Imm1 = MI.getOperand(2).getReg();
1726 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1727 if (!MaybeImmVal)
1728 return false;
1729
1730 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1731 if (Shl2Def->getOpcode() != Opcode)
1732 return false;
1733
1734 Register Base = Shl2Def->getOperand(1).getReg();
1735 Register Imm2 = Shl2Def->getOperand(2).getReg();
1736 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1737 if (!MaybeImm2Val)
1738 return false;
1739
1740 // Pass the combined immediate to the apply function.
1741 MatchInfo.Imm =
1742 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1743 MatchInfo.Reg = Base;
1744
1745 // There is no simple replacement for a saturating unsigned left shift that
1746 // exceeds the scalar size.
1747 if (Opcode == TargetOpcode::G_USHLSAT &&
1748 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1749 return false;
1750
1751 return true;
1752}
1753
1755 RegisterImmPair &MatchInfo) {
1756 unsigned Opcode = MI.getOpcode();
1757 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1758 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1759 Opcode == TargetOpcode::G_USHLSAT) &&
1760 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1761
1762 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1763 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1764 auto Imm = MatchInfo.Imm;
1765
1766 if (Imm >= ScalarSizeInBits) {
1767 // Any logical shift that exceeds scalar size will produce zero.
1768 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1769 Builder.buildConstant(MI.getOperand(0), 0);
1770 MI.eraseFromParent();
1771 return;
1772 }
1773 // Arithmetic shift and saturating signed left shift have no effect beyond
1774 // scalar size.
1775 Imm = ScalarSizeInBits - 1;
1776 }
1777
1778 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1779 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1781 MI.getOperand(1).setReg(MatchInfo.Reg);
1782 MI.getOperand(2).setReg(NewImm);
1784}
1785
1787 ShiftOfShiftedLogic &MatchInfo) {
1788 // We're trying to match the following pattern with any of
1789 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1790 // with any of G_AND/G_OR/G_XOR logic instructions.
1791 // %t1 = SHIFT %X, G_CONSTANT C0
1792 // %t2 = LOGIC %t1, %Y
1793 // %root = SHIFT %t2, G_CONSTANT C1
1794 // -->
1795 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1796 // %t4 = SHIFT %Y, G_CONSTANT C1
1797 // %root = LOGIC %t3, %t4
1798 unsigned ShiftOpcode = MI.getOpcode();
1799 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1800 ShiftOpcode == TargetOpcode::G_ASHR ||
1801 ShiftOpcode == TargetOpcode::G_LSHR ||
1802 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1803 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1804 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1805
1806 // Match a one-use bitwise logic op.
1807 Register LogicDest = MI.getOperand(1).getReg();
1808 if (!MRI.hasOneNonDBGUse(LogicDest))
1809 return false;
1810
1811 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1812 unsigned LogicOpcode = LogicMI->getOpcode();
1813 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1814 LogicOpcode != TargetOpcode::G_XOR)
1815 return false;
1816
1817 // Find a matching one-use shift by constant.
1818 const Register C1 = MI.getOperand(2).getReg();
1819 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1820 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1821 return false;
1822
1823 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1824
1825 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1826 // Shift should match previous one and should be a one-use.
1827 if (MI->getOpcode() != ShiftOpcode ||
1828 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1829 return false;
1830
1831 // Must be a constant.
1832 auto MaybeImmVal =
1833 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1834 if (!MaybeImmVal)
1835 return false;
1836
1837 ShiftVal = MaybeImmVal->Value.getSExtValue();
1838 return true;
1839 };
1840
1841 // Logic ops are commutative, so check each operand for a match.
1842 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1843 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1844 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1845 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1846 uint64_t C0Val;
1847
1848 if (matchFirstShift(LogicMIOp1, C0Val)) {
1849 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1850 MatchInfo.Shift2 = LogicMIOp1;
1851 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1852 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1853 MatchInfo.Shift2 = LogicMIOp2;
1854 } else
1855 return false;
1856
1857 MatchInfo.ValSum = C0Val + C1Val;
1858
1859 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1860 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1861 return false;
1862
1863 MatchInfo.Logic = LogicMI;
1864 return true;
1865}
1866
1868 ShiftOfShiftedLogic &MatchInfo) {
1869 unsigned Opcode = MI.getOpcode();
1870 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1871 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1872 Opcode == TargetOpcode::G_SSHLSAT) &&
1873 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1874
1875 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1876 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1877
1878 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1879
1880 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1881 Register Shift1 =
1882 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1883
1884 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1885 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1886 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1887 // remove old shift1. And it will cause crash later. So erase it earlier to
1888 // avoid the crash.
1889 MatchInfo.Shift2->eraseFromParent();
1890
1891 Register Shift2Const = MI.getOperand(2).getReg();
1892 Register Shift2 = Builder
1893 .buildInstr(Opcode, {DestType},
1894 {MatchInfo.LogicNonShiftReg, Shift2Const})
1895 .getReg(0);
1896
1897 Register Dest = MI.getOperand(0).getReg();
1898 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1899
1900 // This was one use so it's safe to remove it.
1901 MatchInfo.Logic->eraseFromParent();
1902
1903 MI.eraseFromParent();
1904}
1905
1907 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1908 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1909 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1910 auto &Shl = cast<GenericMachineInstr>(MI);
1911 Register DstReg = Shl.getReg(0);
1912 Register SrcReg = Shl.getReg(1);
1913 Register ShiftReg = Shl.getReg(2);
1914 Register X, C1;
1915
1916 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1917 return false;
1918
1919 if (!mi_match(SrcReg, MRI,
1921 m_GOr(m_Reg(X), m_Reg(C1))))))
1922 return false;
1923
1924 APInt C1Val, C2Val;
1925 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1926 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1927 return false;
1928
1929 auto *SrcDef = MRI.getVRegDef(SrcReg);
1930 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1931 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1932 LLT SrcTy = MRI.getType(SrcReg);
1933 MatchInfo = [=](MachineIRBuilder &B) {
1934 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1935 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1936 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1937 };
1938 return true;
1939}
1940
1942 unsigned &ShiftVal) {
1943 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1944 auto MaybeImmVal =
1945 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1946 if (!MaybeImmVal)
1947 return false;
1948
1949 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1950 return (static_cast<int32_t>(ShiftVal) != -1);
1951}
1952
1954 unsigned &ShiftVal) {
1955 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1956 MachineIRBuilder MIB(MI);
1957 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1958 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1960 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1961 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1963}
1964
1965// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1967 RegisterImmPair &MatchData) {
1968 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1969 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1970 return false;
1971
1972 Register LHS = MI.getOperand(1).getReg();
1973
1974 Register ExtSrc;
1975 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1976 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1977 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1978 return false;
1979
1980 Register RHS = MI.getOperand(2).getReg();
1981 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1982 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1983 if (!MaybeShiftAmtVal)
1984 return false;
1985
1986 if (LI) {
1987 LLT SrcTy = MRI.getType(ExtSrc);
1988
1989 // We only really care about the legality with the shifted value. We can
1990 // pick any type the constant shift amount, so ask the target what to
1991 // use. Otherwise we would have to guess and hope it is reported as legal.
1992 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1993 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1994 return false;
1995 }
1996
1997 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1998 MatchData.Reg = ExtSrc;
1999 MatchData.Imm = ShiftAmt;
2000
2001 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2002 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2003 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2004}
2005
2007 const RegisterImmPair &MatchData) {
2008 Register ExtSrcReg = MatchData.Reg;
2009 int64_t ShiftAmtVal = MatchData.Imm;
2010
2011 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2012 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2013 auto NarrowShift =
2014 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2015 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2016 MI.eraseFromParent();
2017}
2018
2020 Register &MatchInfo) {
2021 GMerge &Merge = cast<GMerge>(MI);
2022 SmallVector<Register, 16> MergedValues;
2023 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2024 MergedValues.emplace_back(Merge.getSourceReg(I));
2025
2026 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2027 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2028 return false;
2029
2030 for (unsigned I = 0; I < MergedValues.size(); ++I)
2031 if (MergedValues[I] != Unmerge->getReg(I))
2032 return false;
2033
2034 MatchInfo = Unmerge->getSourceReg();
2035 return true;
2036}
2037
2039 const MachineRegisterInfo &MRI) {
2040 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2041 ;
2042
2043 return Reg;
2044}
2045
2048 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2049 "Expected an unmerge");
2050 auto &Unmerge = cast<GUnmerge>(MI);
2051 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2052
2053 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2054 if (!SrcInstr)
2055 return false;
2056
2057 // Check the source type of the merge.
2058 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2059 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2060 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2061 if (SrcMergeTy != Dst0Ty && !SameSize)
2062 return false;
2063 // They are the same now (modulo a bitcast).
2064 // We can collect all the src registers.
2065 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2066 Operands.push_back(SrcInstr->getSourceReg(Idx));
2067 return true;
2068}
2069
2072 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2073 "Expected an unmerge");
2074 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2075 "Not enough operands to replace all defs");
2076 unsigned NumElems = MI.getNumOperands() - 1;
2077
2078 LLT SrcTy = MRI.getType(Operands[0]);
2079 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2080 bool CanReuseInputDirectly = DstTy == SrcTy;
2081 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2082 Register DstReg = MI.getOperand(Idx).getReg();
2083 Register SrcReg = Operands[Idx];
2084
2085 // This combine may run after RegBankSelect, so we need to be aware of
2086 // register banks.
2087 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2088 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2089 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2090 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2091 }
2092
2093 if (CanReuseInputDirectly)
2094 replaceRegWith(MRI, DstReg, SrcReg);
2095 else
2096 Builder.buildCast(DstReg, SrcReg);
2097 }
2098 MI.eraseFromParent();
2099}
2100
2102 SmallVectorImpl<APInt> &Csts) {
2103 unsigned SrcIdx = MI.getNumOperands() - 1;
2104 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2105 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2106 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2107 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2108 return false;
2109 // Break down the big constant in smaller ones.
2110 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2111 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2112 ? CstVal.getCImm()->getValue()
2113 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2114
2115 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2116 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2117 // Unmerge a constant.
2118 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2119 Csts.emplace_back(Val.trunc(ShiftAmt));
2120 Val = Val.lshr(ShiftAmt);
2121 }
2122
2123 return true;
2124}
2125
2127 SmallVectorImpl<APInt> &Csts) {
2128 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2129 "Expected an unmerge");
2130 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2131 "Not enough operands to replace all defs");
2132 unsigned NumElems = MI.getNumOperands() - 1;
2133 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2134 Register DstReg = MI.getOperand(Idx).getReg();
2135 Builder.buildConstant(DstReg, Csts[Idx]);
2136 }
2137
2138 MI.eraseFromParent();
2139}
2140
2142 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2143 unsigned SrcIdx = MI.getNumOperands() - 1;
2144 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2145 MatchInfo = [&MI](MachineIRBuilder &B) {
2146 unsigned NumElems = MI.getNumOperands() - 1;
2147 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2148 Register DstReg = MI.getOperand(Idx).getReg();
2149 B.buildUndef(DstReg);
2150 }
2151 };
2152 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2153}
2154
2156 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2157 "Expected an unmerge");
2158 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2159 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2160 return false;
2161 // Check that all the lanes are dead except the first one.
2162 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2163 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2164 return false;
2165 }
2166 return true;
2167}
2168
2170 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2171 Register Dst0Reg = MI.getOperand(0).getReg();
2172 Builder.buildTrunc(Dst0Reg, SrcReg);
2173 MI.eraseFromParent();
2174}
2175
2177 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2178 "Expected an unmerge");
2179 Register Dst0Reg = MI.getOperand(0).getReg();
2180 LLT Dst0Ty = MRI.getType(Dst0Reg);
2181 // G_ZEXT on vector applies to each lane, so it will
2182 // affect all destinations. Therefore we won't be able
2183 // to simplify the unmerge to just the first definition.
2184 if (Dst0Ty.isVector())
2185 return false;
2186 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2187 LLT SrcTy = MRI.getType(SrcReg);
2188 if (SrcTy.isVector())
2189 return false;
2190
2191 Register ZExtSrcReg;
2192 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2193 return false;
2194
2195 // Finally we can replace the first definition with
2196 // a zext of the source if the definition is big enough to hold
2197 // all of ZExtSrc bits.
2198 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2199 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2200}
2201
2203 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2204 "Expected an unmerge");
2205
2206 Register Dst0Reg = MI.getOperand(0).getReg();
2207
2208 MachineInstr *ZExtInstr =
2209 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2210 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2211 "Expecting a G_ZEXT");
2212
2213 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2214 LLT Dst0Ty = MRI.getType(Dst0Reg);
2215 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2216
2217 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2218 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2219 } else {
2220 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2221 "ZExt src doesn't fit in destination");
2222 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2223 }
2224
2225 Register ZeroReg;
2226 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2227 if (!ZeroReg)
2228 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2229 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2230 }
2231 MI.eraseFromParent();
2232}
2233
2235 unsigned TargetShiftSize,
2236 unsigned &ShiftVal) {
2237 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2238 MI.getOpcode() == TargetOpcode::G_LSHR ||
2239 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2240
2241 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2242 if (Ty.isVector()) // TODO:
2243 return false;
2244
2245 // Don't narrow further than the requested size.
2246 unsigned Size = Ty.getSizeInBits();
2247 if (Size <= TargetShiftSize)
2248 return false;
2249
2250 auto MaybeImmVal =
2251 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2252 if (!MaybeImmVal)
2253 return false;
2254
2255 ShiftVal = MaybeImmVal->Value.getSExtValue();
2256 return ShiftVal >= Size / 2 && ShiftVal < Size;
2257}
2258
2260 const unsigned &ShiftVal) {
2261 Register DstReg = MI.getOperand(0).getReg();
2262 Register SrcReg = MI.getOperand(1).getReg();
2263 LLT Ty = MRI.getType(SrcReg);
2264 unsigned Size = Ty.getSizeInBits();
2265 unsigned HalfSize = Size / 2;
2266 assert(ShiftVal >= HalfSize);
2267
2268 LLT HalfTy = LLT::scalar(HalfSize);
2269
2270 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2271 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2272
2273 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2274 Register Narrowed = Unmerge.getReg(1);
2275
2276 // dst = G_LSHR s64:x, C for C >= 32
2277 // =>
2278 // lo, hi = G_UNMERGE_VALUES x
2279 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2280
2281 if (NarrowShiftAmt != 0) {
2282 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2283 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2284 }
2285
2286 auto Zero = Builder.buildConstant(HalfTy, 0);
2287 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2288 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2289 Register Narrowed = Unmerge.getReg(0);
2290 // dst = G_SHL s64:x, C for C >= 32
2291 // =>
2292 // lo, hi = G_UNMERGE_VALUES x
2293 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2294 if (NarrowShiftAmt != 0) {
2295 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2296 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2297 }
2298
2299 auto Zero = Builder.buildConstant(HalfTy, 0);
2300 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2301 } else {
2302 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2303 auto Hi = Builder.buildAShr(
2304 HalfTy, Unmerge.getReg(1),
2305 Builder.buildConstant(HalfTy, HalfSize - 1));
2306
2307 if (ShiftVal == HalfSize) {
2308 // (G_ASHR i64:x, 32) ->
2309 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2310 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2311 } else if (ShiftVal == Size - 1) {
2312 // Don't need a second shift.
2313 // (G_ASHR i64:x, 63) ->
2314 // %narrowed = (G_ASHR hi_32(x), 31)
2315 // G_MERGE_VALUES %narrowed, %narrowed
2316 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2317 } else {
2318 auto Lo = Builder.buildAShr(
2319 HalfTy, Unmerge.getReg(1),
2320 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2321
2322 // (G_ASHR i64:x, C) ->, for C >= 32
2323 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2324 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2325 }
2326 }
2327
2328 MI.eraseFromParent();
2329}
2330
2332 unsigned TargetShiftAmount) {
2333 unsigned ShiftAmt;
2334 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2335 applyCombineShiftToUnmerge(MI, ShiftAmt);
2336 return true;
2337 }
2338
2339 return false;
2340}
2341
2343 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2344 Register DstReg = MI.getOperand(0).getReg();
2345 LLT DstTy = MRI.getType(DstReg);
2346 Register SrcReg = MI.getOperand(1).getReg();
2347 return mi_match(SrcReg, MRI,
2348 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2349}
2350
2352 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2353 Register DstReg = MI.getOperand(0).getReg();
2354 Builder.buildCopy(DstReg, Reg);
2355 MI.eraseFromParent();
2356}
2357
2359 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2360 Register DstReg = MI.getOperand(0).getReg();
2361 Builder.buildZExtOrTrunc(DstReg, Reg);
2362 MI.eraseFromParent();
2363}
2364
2366 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2367 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2368 Register LHS = MI.getOperand(1).getReg();
2369 Register RHS = MI.getOperand(2).getReg();
2370 LLT IntTy = MRI.getType(LHS);
2371
2372 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2373 // instruction.
2374 PtrReg.second = false;
2375 for (Register SrcReg : {LHS, RHS}) {
2376 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2377 // Don't handle cases where the integer is implicitly converted to the
2378 // pointer width.
2379 LLT PtrTy = MRI.getType(PtrReg.first);
2380 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2381 return true;
2382 }
2383
2384 PtrReg.second = true;
2385 }
2386
2387 return false;
2388}
2389
2391 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2392 Register Dst = MI.getOperand(0).getReg();
2393 Register LHS = MI.getOperand(1).getReg();
2394 Register RHS = MI.getOperand(2).getReg();
2395
2396 const bool DoCommute = PtrReg.second;
2397 if (DoCommute)
2398 std::swap(LHS, RHS);
2399 LHS = PtrReg.first;
2400
2401 LLT PtrTy = MRI.getType(LHS);
2402
2403 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2404 Builder.buildPtrToInt(Dst, PtrAdd);
2405 MI.eraseFromParent();
2406}
2407
2409 APInt &NewCst) {
2410 auto &PtrAdd = cast<GPtrAdd>(MI);
2411 Register LHS = PtrAdd.getBaseReg();
2412 Register RHS = PtrAdd.getOffsetReg();
2414
2415 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2416 APInt Cst;
2417 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2418 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2419 // G_INTTOPTR uses zero-extension
2420 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2421 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2422 return true;
2423 }
2424 }
2425
2426 return false;
2427}
2428
2430 APInt &NewCst) {
2431 auto &PtrAdd = cast<GPtrAdd>(MI);
2432 Register Dst = PtrAdd.getReg(0);
2433
2434 Builder.buildConstant(Dst, NewCst);
2435 PtrAdd.eraseFromParent();
2436}
2437
2439 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2440 Register DstReg = MI.getOperand(0).getReg();
2441 Register SrcReg = MI.getOperand(1).getReg();
2442 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2443 if (OriginalSrcReg.isValid())
2444 SrcReg = OriginalSrcReg;
2445 LLT DstTy = MRI.getType(DstReg);
2446 return mi_match(SrcReg, MRI,
2447 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2448}
2449
2451 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2452 Register DstReg = MI.getOperand(0).getReg();
2453 Register SrcReg = MI.getOperand(1).getReg();
2454 LLT DstTy = MRI.getType(DstReg);
2455 if (mi_match(SrcReg, MRI,
2456 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2457 unsigned DstSize = DstTy.getScalarSizeInBits();
2458 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2459 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2460 }
2461 return false;
2462}
2463
2465 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2466 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2467 MI.getOpcode() == TargetOpcode::G_SEXT ||
2468 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2469 "Expected a G_[ASZ]EXT");
2470 Register SrcReg = MI.getOperand(1).getReg();
2471 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2472 if (OriginalSrcReg.isValid())
2473 SrcReg = OriginalSrcReg;
2474 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2475 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2476 unsigned Opc = MI.getOpcode();
2477 unsigned SrcOpc = SrcMI->getOpcode();
2478 if (Opc == SrcOpc ||
2479 (Opc == TargetOpcode::G_ANYEXT &&
2480 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2481 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2482 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2483 return true;
2484 }
2485 return false;
2486}
2487
2489 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2490 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2491 MI.getOpcode() == TargetOpcode::G_SEXT ||
2492 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2493 "Expected a G_[ASZ]EXT");
2494
2495 Register Reg = std::get<0>(MatchInfo);
2496 unsigned SrcExtOp = std::get<1>(MatchInfo);
2497
2498 // Combine exts with the same opcode.
2499 if (MI.getOpcode() == SrcExtOp) {
2501 MI.getOperand(1).setReg(Reg);
2503 return;
2504 }
2505
2506 // Combine:
2507 // - anyext([sz]ext x) to [sz]ext x
2508 // - sext(zext x) to zext x
2509 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2510 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2511 SrcExtOp == TargetOpcode::G_ZEXT)) {
2512 Register DstReg = MI.getOperand(0).getReg();
2513 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2514 MI.eraseFromParent();
2515 }
2516}
2517
2519 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2520 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2521 Register SrcReg = MI.getOperand(1).getReg();
2522 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2523 unsigned SrcOpc = SrcMI->getOpcode();
2524 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2525 SrcOpc == TargetOpcode::G_ZEXT) {
2526 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2527 return true;
2528 }
2529 return false;
2530}
2531
2533 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2534 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2535 Register SrcReg = MatchInfo.first;
2536 unsigned SrcExtOp = MatchInfo.second;
2537 Register DstReg = MI.getOperand(0).getReg();
2538 LLT SrcTy = MRI.getType(SrcReg);
2539 LLT DstTy = MRI.getType(DstReg);
2540 if (SrcTy == DstTy) {
2541 MI.eraseFromParent();
2542 replaceRegWith(MRI, DstReg, SrcReg);
2543 return;
2544 }
2545 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2546 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2547 else
2548 Builder.buildTrunc(DstReg, SrcReg);
2549 MI.eraseFromParent();
2550}
2551
2553 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2554 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2555
2556 // ShiftTy > 32 > TruncTy -> 32
2557 if (ShiftSize > 32 && TruncSize < 32)
2558 return ShiftTy.changeElementSize(32);
2559
2560 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2561 // Some targets like it, some don't, some only like it under certain
2562 // conditions/processor versions, etc.
2563 // A TL hook might be needed for this.
2564
2565 // Don't combine
2566 return ShiftTy;
2567}
2568
2570 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2571 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2572 Register DstReg = MI.getOperand(0).getReg();
2573 Register SrcReg = MI.getOperand(1).getReg();
2574
2575 if (!MRI.hasOneNonDBGUse(SrcReg))
2576 return false;
2577
2578 LLT SrcTy = MRI.getType(SrcReg);
2579 LLT DstTy = MRI.getType(DstReg);
2580
2581 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2582 const auto &TL = getTargetLowering();
2583
2584 LLT NewShiftTy;
2585 switch (SrcMI->getOpcode()) {
2586 default:
2587 return false;
2588 case TargetOpcode::G_SHL: {
2589 NewShiftTy = DstTy;
2590
2591 // Make sure new shift amount is legal.
2592 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2593 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2594 return false;
2595 break;
2596 }
2597 case TargetOpcode::G_LSHR:
2598 case TargetOpcode::G_ASHR: {
2599 // For right shifts, we conservatively do not do the transform if the TRUNC
2600 // has any STORE users. The reason is that if we change the type of the
2601 // shift, we may break the truncstore combine.
2602 //
2603 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2604 for (auto &User : MRI.use_instructions(DstReg))
2605 if (User.getOpcode() == TargetOpcode::G_STORE)
2606 return false;
2607
2608 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2609 if (NewShiftTy == SrcTy)
2610 return false;
2611
2612 // Make sure we won't lose information by truncating the high bits.
2613 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2614 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2615 DstTy.getScalarSizeInBits()))
2616 return false;
2617 break;
2618 }
2619 }
2620
2622 {SrcMI->getOpcode(),
2623 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2624 return false;
2625
2626 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2627 return true;
2628}
2629
2631 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2632 MachineInstr *ShiftMI = MatchInfo.first;
2633 LLT NewShiftTy = MatchInfo.second;
2634
2635 Register Dst = MI.getOperand(0).getReg();
2636 LLT DstTy = MRI.getType(Dst);
2637
2638 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2639 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2640 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2641
2642 Register NewShift =
2643 Builder
2644 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2645 .getReg(0);
2646
2647 if (NewShiftTy == DstTy)
2648 replaceRegWith(MRI, Dst, NewShift);
2649 else
2650 Builder.buildTrunc(Dst, NewShift);
2651
2652 eraseInst(MI);
2653}
2654
2656 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2657 return MO.isReg() &&
2658 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2659 });
2660}
2661
2663 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2664 return !MO.isReg() ||
2665 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2666 });
2667}
2668
2670 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2671 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2672 return all_of(Mask, [](int Elt) { return Elt < 0; });
2673}
2674
2676 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2677 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2678 MRI);
2679}
2680
2682 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2683 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2684 MRI);
2685}
2686
2688 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2689 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2690 "Expected an insert/extract element op");
2691 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2692 unsigned IdxIdx =
2693 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2694 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2695 if (!Idx)
2696 return false;
2697 return Idx->getZExtValue() >= VecTy.getNumElements();
2698}
2699
2701 GSelect &SelMI = cast<GSelect>(MI);
2702 auto Cst =
2704 if (!Cst)
2705 return false;
2706 OpIdx = Cst->isZero() ? 3 : 2;
2707 return true;
2708}
2709
2710void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2711
2713 const MachineOperand &MOP2) {
2714 if (!MOP1.isReg() || !MOP2.isReg())
2715 return false;
2716 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2717 if (!InstAndDef1)
2718 return false;
2719 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2720 if (!InstAndDef2)
2721 return false;
2722 MachineInstr *I1 = InstAndDef1->MI;
2723 MachineInstr *I2 = InstAndDef2->MI;
2724
2725 // Handle a case like this:
2726 //
2727 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2728 //
2729 // Even though %0 and %1 are produced by the same instruction they are not
2730 // the same values.
2731 if (I1 == I2)
2732 return MOP1.getReg() == MOP2.getReg();
2733
2734 // If we have an instruction which loads or stores, we can't guarantee that
2735 // it is identical.
2736 //
2737 // For example, we may have
2738 //
2739 // %x1 = G_LOAD %addr (load N from @somewhere)
2740 // ...
2741 // call @foo
2742 // ...
2743 // %x2 = G_LOAD %addr (load N from @somewhere)
2744 // ...
2745 // %or = G_OR %x1, %x2
2746 //
2747 // It's possible that @foo will modify whatever lives at the address we're
2748 // loading from. To be safe, let's just assume that all loads and stores
2749 // are different (unless we have something which is guaranteed to not
2750 // change.)
2751 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2752 return false;
2753
2754 // If both instructions are loads or stores, they are equal only if both
2755 // are dereferenceable invariant loads with the same number of bits.
2756 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2757 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2758 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2759 if (!LS1 || !LS2)
2760 return false;
2761
2762 if (!I2->isDereferenceableInvariantLoad() ||
2763 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2764 return false;
2765 }
2766
2767 // Check for physical registers on the instructions first to avoid cases
2768 // like this:
2769 //
2770 // %a = COPY $physreg
2771 // ...
2772 // SOMETHING implicit-def $physreg
2773 // ...
2774 // %b = COPY $physreg
2775 //
2776 // These copies are not equivalent.
2777 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2778 return MO.isReg() && MO.getReg().isPhysical();
2779 })) {
2780 // Check if we have a case like this:
2781 //
2782 // %a = COPY $physreg
2783 // %b = COPY %a
2784 //
2785 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2786 // From that, we know that they must have the same value, since they must
2787 // have come from the same COPY.
2788 return I1->isIdenticalTo(*I2);
2789 }
2790
2791 // We don't have any physical registers, so we don't necessarily need the
2792 // same vreg defs.
2793 //
2794 // On the off-chance that there's some target instruction feeding into the
2795 // instruction, let's use produceSameValue instead of isIdenticalTo.
2796 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2797 // Handle instructions with multiple defs that produce same values. Values
2798 // are same for operands with same index.
2799 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2800 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2801 // I1 and I2 are different instructions but produce same values,
2802 // %1 and %6 are same, %1 and %7 are not the same value.
2803 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2804 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2805 }
2806 return false;
2807}
2808
2810 if (!MOP.isReg())
2811 return false;
2812 auto *MI = MRI.getVRegDef(MOP.getReg());
2813 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2814 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2815 MaybeCst->getSExtValue() == C;
2816}
2817
2819 if (!MOP.isReg())
2820 return false;
2821 std::optional<FPValueAndVReg> MaybeCst;
2822 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2823 return false;
2824
2825 return MaybeCst->Value.isExactlyValue(C);
2826}
2827
2829 unsigned OpIdx) {
2830 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2831 Register OldReg = MI.getOperand(0).getReg();
2832 Register Replacement = MI.getOperand(OpIdx).getReg();
2833 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2834 MI.eraseFromParent();
2835 replaceRegWith(MRI, OldReg, Replacement);
2836}
2837
2839 Register Replacement) {
2840 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2841 Register OldReg = MI.getOperand(0).getReg();
2842 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2843 MI.eraseFromParent();
2844 replaceRegWith(MRI, OldReg, Replacement);
2845}
2846
2848 unsigned ConstIdx) {
2849 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2850 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2851
2852 // Get the shift amount
2853 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2854 if (!VRegAndVal)
2855 return false;
2856
2857 // Return true of shift amount >= Bitwidth
2858 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2859}
2860
2862 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2863 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2864 "This is not a funnel shift operation");
2865
2866 Register ConstReg = MI.getOperand(3).getReg();
2867 LLT ConstTy = MRI.getType(ConstReg);
2868 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2869
2870 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2871 assert((VRegAndVal) && "Value is not a constant");
2872
2873 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2874 APInt NewConst = VRegAndVal->Value.urem(
2875 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2876
2877 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2879 MI.getOpcode(), {MI.getOperand(0)},
2880 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2881
2882 MI.eraseFromParent();
2883}
2884
2886 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2887 // Match (cond ? x : x)
2888 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2889 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2890 MRI);
2891}
2892
2894 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2895 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2896 MRI);
2897}
2898
2900 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2901 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2902 MRI);
2903}
2904
2906 MachineOperand &MO = MI.getOperand(OpIdx);
2907 return MO.isReg() &&
2908 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2909}
2910
2912 unsigned OpIdx) {
2913 MachineOperand &MO = MI.getOperand(OpIdx);
2914 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2915}
2916
2918 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2919 Builder.buildFConstant(MI.getOperand(0), C);
2920 MI.eraseFromParent();
2921}
2922
2924 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2925 Builder.buildConstant(MI.getOperand(0), C);
2926 MI.eraseFromParent();
2927}
2928
2930 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2931 Builder.buildConstant(MI.getOperand(0), C);
2932 MI.eraseFromParent();
2933}
2934
2936 ConstantFP *CFP) {
2937 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2938 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2939 MI.eraseFromParent();
2940}
2941
2943 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2944 Builder.buildUndef(MI.getOperand(0));
2945 MI.eraseFromParent();
2946}
2947
2949 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2950 Register LHS = MI.getOperand(1).getReg();
2951 Register RHS = MI.getOperand(2).getReg();
2952 Register &NewLHS = std::get<0>(MatchInfo);
2953 Register &NewRHS = std::get<1>(MatchInfo);
2954
2955 // Helper lambda to check for opportunities for
2956 // ((0-A) + B) -> B - A
2957 // (A + (0-B)) -> A - B
2958 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2959 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2960 return false;
2961 NewLHS = MaybeNewLHS;
2962 return true;
2963 };
2964
2965 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2966}
2967
2970 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2971 "Invalid opcode");
2972 Register DstReg = MI.getOperand(0).getReg();
2973 LLT DstTy = MRI.getType(DstReg);
2974 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2975 unsigned NumElts = DstTy.getNumElements();
2976 // If this MI is part of a sequence of insert_vec_elts, then
2977 // don't do the combine in the middle of the sequence.
2978 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2979 TargetOpcode::G_INSERT_VECTOR_ELT)
2980 return false;
2981 MachineInstr *CurrInst = &MI;
2982 MachineInstr *TmpInst;
2983 int64_t IntImm;
2984 Register TmpReg;
2985 MatchInfo.resize(NumElts);
2986 while (mi_match(
2987 CurrInst->getOperand(0).getReg(), MRI,
2988 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2989 if (IntImm >= NumElts || IntImm < 0)
2990 return false;
2991 if (!MatchInfo[IntImm])
2992 MatchInfo[IntImm] = TmpReg;
2993 CurrInst = TmpInst;
2994 }
2995 // Variable index.
2996 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2997 return false;
2998 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2999 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3000 if (!MatchInfo[I - 1].isValid())
3001 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3002 }
3003 return true;
3004 }
3005 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3006 // overwritten, bail out.
3007 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3008 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3009}
3010
3013 Register UndefReg;
3014 auto GetUndef = [&]() {
3015 if (UndefReg)
3016 return UndefReg;
3017 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3018 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3019 return UndefReg;
3020 };
3021 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
3022 if (!MatchInfo[I])
3023 MatchInfo[I] = GetUndef();
3024 }
3025 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3026 MI.eraseFromParent();
3027}
3028
3030 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3031 Register SubLHS, SubRHS;
3032 std::tie(SubLHS, SubRHS) = MatchInfo;
3033 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3034 MI.eraseFromParent();
3035}
3036
3039 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3040 //
3041 // Creates the new hand + logic instruction (but does not insert them.)
3042 //
3043 // On success, MatchInfo is populated with the new instructions. These are
3044 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3045 unsigned LogicOpcode = MI.getOpcode();
3046 assert(LogicOpcode == TargetOpcode::G_AND ||
3047 LogicOpcode == TargetOpcode::G_OR ||
3048 LogicOpcode == TargetOpcode::G_XOR);
3049 MachineIRBuilder MIB(MI);
3050 Register Dst = MI.getOperand(0).getReg();
3051 Register LHSReg = MI.getOperand(1).getReg();
3052 Register RHSReg = MI.getOperand(2).getReg();
3053
3054 // Don't recompute anything.
3055 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3056 return false;
3057
3058 // Make sure we have (hand x, ...), (hand y, ...)
3059 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3060 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3061 if (!LeftHandInst || !RightHandInst)
3062 return false;
3063 unsigned HandOpcode = LeftHandInst->getOpcode();
3064 if (HandOpcode != RightHandInst->getOpcode())
3065 return false;
3066 if (!LeftHandInst->getOperand(1).isReg() ||
3067 !RightHandInst->getOperand(1).isReg())
3068 return false;
3069
3070 // Make sure the types match up, and if we're doing this post-legalization,
3071 // we end up with legal types.
3072 Register X = LeftHandInst->getOperand(1).getReg();
3073 Register Y = RightHandInst->getOperand(1).getReg();
3074 LLT XTy = MRI.getType(X);
3075 LLT YTy = MRI.getType(Y);
3076 if (!XTy.isValid() || XTy != YTy)
3077 return false;
3078
3079 // Optional extra source register.
3080 Register ExtraHandOpSrcReg;
3081 switch (HandOpcode) {
3082 default:
3083 return false;
3084 case TargetOpcode::G_ANYEXT:
3085 case TargetOpcode::G_SEXT:
3086 case TargetOpcode::G_ZEXT: {
3087 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3088 break;
3089 }
3090 case TargetOpcode::G_AND:
3091 case TargetOpcode::G_ASHR:
3092 case TargetOpcode::G_LSHR:
3093 case TargetOpcode::G_SHL: {
3094 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3095 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3096 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3097 return false;
3098 ExtraHandOpSrcReg = ZOp.getReg();
3099 break;
3100 }
3101 }
3102
3103 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3104 return false;
3105
3106 // Record the steps to build the new instructions.
3107 //
3108 // Steps to build (logic x, y)
3109 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3110 OperandBuildSteps LogicBuildSteps = {
3111 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3112 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3113 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3114 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3115
3116 // Steps to build hand (logic x, y), ...z
3117 OperandBuildSteps HandBuildSteps = {
3118 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3119 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3120 if (ExtraHandOpSrcReg.isValid())
3121 HandBuildSteps.push_back(
3122 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3123 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3124
3125 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3126 return true;
3127}
3128
3131 assert(MatchInfo.InstrsToBuild.size() &&
3132 "Expected at least one instr to build?");
3133 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3134 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3135 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3136 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3137 for (auto &OperandFn : InstrToBuild.OperandFns)
3138 OperandFn(Instr);
3139 }
3140 MI.eraseFromParent();
3141}
3142
3144 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3145 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3146 int64_t ShlCst, AshrCst;
3147 Register Src;
3148 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3149 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3150 m_ICstOrSplat(AshrCst))))
3151 return false;
3152 if (ShlCst != AshrCst)
3153 return false;
3155 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3156 return false;
3157 MatchInfo = std::make_tuple(Src, ShlCst);
3158 return true;
3159}
3160
3162 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3163 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3164 Register Src;
3165 int64_t ShiftAmt;
3166 std::tie(Src, ShiftAmt) = MatchInfo;
3167 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3168 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3169 MI.eraseFromParent();
3170}
3171
3172/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3174 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3175 assert(MI.getOpcode() == TargetOpcode::G_AND);
3176
3177 Register Dst = MI.getOperand(0).getReg();
3178 LLT Ty = MRI.getType(Dst);
3179
3180 Register R;
3181 int64_t C1;
3182 int64_t C2;
3183 if (!mi_match(
3184 Dst, MRI,
3185 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3186 return false;
3187
3188 MatchInfo = [=](MachineIRBuilder &B) {
3189 if (C1 & C2) {
3190 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3191 return;
3192 }
3193 auto Zero = B.buildConstant(Ty, 0);
3194 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3195 };
3196 return true;
3197}
3198
3200 Register &Replacement) {
3201 // Given
3202 //
3203 // %y:_(sN) = G_SOMETHING
3204 // %x:_(sN) = G_SOMETHING
3205 // %res:_(sN) = G_AND %x, %y
3206 //
3207 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3208 //
3209 // Patterns like this can appear as a result of legalization. E.g.
3210 //
3211 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3212 // %one:_(s32) = G_CONSTANT i32 1
3213 // %and:_(s32) = G_AND %cmp, %one
3214 //
3215 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3216 assert(MI.getOpcode() == TargetOpcode::G_AND);
3217 if (!KB)
3218 return false;
3219
3220 Register AndDst = MI.getOperand(0).getReg();
3221 Register LHS = MI.getOperand(1).getReg();
3222 Register RHS = MI.getOperand(2).getReg();
3223 KnownBits LHSBits = KB->getKnownBits(LHS);
3224 KnownBits RHSBits = KB->getKnownBits(RHS);
3225
3226 // Check that x & Mask == x.
3227 // x & 1 == x, always
3228 // x & 0 == x, only if x is also 0
3229 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3230 //
3231 // Check if we can replace AndDst with the LHS of the G_AND
3232 if (canReplaceReg(AndDst, LHS, MRI) &&
3233 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3234 Replacement = LHS;
3235 return true;
3236 }
3237
3238 // Check if we can replace AndDst with the RHS of the G_AND
3239 if (canReplaceReg(AndDst, RHS, MRI) &&
3240 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3241 Replacement = RHS;
3242 return true;
3243 }
3244
3245 return false;
3246}
3247
3249 // Given
3250 //
3251 // %y:_(sN) = G_SOMETHING
3252 // %x:_(sN) = G_SOMETHING
3253 // %res:_(sN) = G_OR %x, %y
3254 //
3255 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3256 assert(MI.getOpcode() == TargetOpcode::G_OR);
3257 if (!KB)
3258 return false;
3259
3260 Register OrDst = MI.getOperand(0).getReg();
3261 Register LHS = MI.getOperand(1).getReg();
3262 Register RHS = MI.getOperand(2).getReg();
3263 KnownBits LHSBits = KB->getKnownBits(LHS);
3264 KnownBits RHSBits = KB->getKnownBits(RHS);
3265
3266 // Check that x | Mask == x.
3267 // x | 0 == x, always
3268 // x | 1 == x, only if x is also 1
3269 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3270 //
3271 // Check if we can replace OrDst with the LHS of the G_OR
3272 if (canReplaceReg(OrDst, LHS, MRI) &&
3273 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3274 Replacement = LHS;
3275 return true;
3276 }
3277
3278 // Check if we can replace OrDst with the RHS of the G_OR
3279 if (canReplaceReg(OrDst, RHS, MRI) &&
3280 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3281 Replacement = RHS;
3282 return true;
3283 }
3284
3285 return false;
3286}
3287
3289 // If the input is already sign extended, just drop the extension.
3290 Register Src = MI.getOperand(1).getReg();
3291 unsigned ExtBits = MI.getOperand(2).getImm();
3292 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3293 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3294}
3295
3296static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3297 int64_t Cst, bool IsVector, bool IsFP) {
3298 // For i1, Cst will always be -1 regardless of boolean contents.
3299 return (ScalarSizeBits == 1 && Cst == -1) ||
3300 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3301}
3302
3304 SmallVectorImpl<Register> &RegsToNegate) {
3305 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3306 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3307 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3308 Register XorSrc;
3309 Register CstReg;
3310 // We match xor(src, true) here.
3311 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3312 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3313 return false;
3314
3315 if (!MRI.hasOneNonDBGUse(XorSrc))
3316 return false;
3317
3318 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3319 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3320 // list of tree nodes to visit.
3321 RegsToNegate.push_back(XorSrc);
3322 // Remember whether the comparisons are all integer or all floating point.
3323 bool IsInt = false;
3324 bool IsFP = false;
3325 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3326 Register Reg = RegsToNegate[I];
3327 if (!MRI.hasOneNonDBGUse(Reg))
3328 return false;
3329 MachineInstr *Def = MRI.getVRegDef(Reg);
3330 switch (Def->getOpcode()) {
3331 default:
3332 // Don't match if the tree contains anything other than ANDs, ORs and
3333 // comparisons.
3334 return false;
3335 case TargetOpcode::G_ICMP:
3336 if (IsFP)
3337 return false;
3338 IsInt = true;
3339 // When we apply the combine we will invert the predicate.
3340 break;
3341 case TargetOpcode::G_FCMP:
3342 if (IsInt)
3343 return false;
3344 IsFP = true;
3345 // When we apply the combine we will invert the predicate.
3346 break;
3347 case TargetOpcode::G_AND:
3348 case TargetOpcode::G_OR:
3349 // Implement De Morgan's laws:
3350 // ~(x & y) -> ~x | ~y
3351 // ~(x | y) -> ~x & ~y
3352 // When we apply the combine we will change the opcode and recursively
3353 // negate the operands.
3354 RegsToNegate.push_back(Def->getOperand(1).getReg());
3355 RegsToNegate.push_back(Def->getOperand(2).getReg());
3356 break;
3357 }
3358 }
3359
3360 // Now we know whether the comparisons are integer or floating point, check
3361 // the constant in the xor.
3362 int64_t Cst;
3363 if (Ty.isVector()) {
3364 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3365 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3366 if (!MaybeCst)
3367 return false;
3368 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3369 return false;
3370 } else {
3371 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3372 return false;
3373 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3374 return false;
3375 }
3376
3377 return true;
3378}
3379
3381 SmallVectorImpl<Register> &RegsToNegate) {
3382 for (Register Reg : RegsToNegate) {
3383 MachineInstr *Def = MRI.getVRegDef(Reg);
3384 Observer.changingInstr(*Def);
3385 // For each comparison, invert the opcode. For each AND and OR, change the
3386 // opcode.
3387 switch (Def->getOpcode()) {
3388 default:
3389 llvm_unreachable("Unexpected opcode");
3390 case TargetOpcode::G_ICMP:
3391 case TargetOpcode::G_FCMP: {
3392 MachineOperand &PredOp = Def->getOperand(1);
3395 PredOp.setPredicate(NewP);
3396 break;
3397 }
3398 case TargetOpcode::G_AND:
3399 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3400 break;
3401 case TargetOpcode::G_OR:
3402 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3403 break;
3404 }
3405 Observer.changedInstr(*Def);
3406 }
3407
3408 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3409 MI.eraseFromParent();
3410}
3411
3413 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3414 // Match (xor (and x, y), y) (or any of its commuted cases)
3415 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3416 Register &X = MatchInfo.first;
3417 Register &Y = MatchInfo.second;
3418 Register AndReg = MI.getOperand(1).getReg();
3419 Register SharedReg = MI.getOperand(2).getReg();
3420
3421 // Find a G_AND on either side of the G_XOR.
3422 // Look for one of
3423 //
3424 // (xor (and x, y), SharedReg)
3425 // (xor SharedReg, (and x, y))
3426 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3427 std::swap(AndReg, SharedReg);
3428 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3429 return false;
3430 }
3431
3432 // Only do this if we'll eliminate the G_AND.
3433 if (!MRI.hasOneNonDBGUse(AndReg))
3434 return false;
3435
3436 // We can combine if SharedReg is the same as either the LHS or RHS of the
3437 // G_AND.
3438 if (Y != SharedReg)
3439 std::swap(X, Y);
3440 return Y == SharedReg;
3441}
3442
3444 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3445 // Fold (xor (and x, y), y) -> (and (not x), y)
3446 Register X, Y;
3447 std::tie(X, Y) = MatchInfo;
3448 auto Not = Builder.buildNot(MRI.getType(X), X);
3450 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3451 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3452 MI.getOperand(2).setReg(Y);
3454}
3455
3457 auto &PtrAdd = cast<GPtrAdd>(MI);
3458 Register DstReg = PtrAdd.getReg(0);
3459 LLT Ty = MRI.getType(DstReg);
3461
3462 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3463 return false;
3464
3465 if (Ty.isPointer()) {
3466 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3467 return ConstVal && *ConstVal == 0;
3468 }
3469
3470 assert(Ty.isVector() && "Expecting a vector type");
3471 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3472 return isBuildVectorAllZeros(*VecMI, MRI);
3473}
3474
3476 auto &PtrAdd = cast<GPtrAdd>(MI);
3477 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3478 PtrAdd.eraseFromParent();
3479}
3480
3481/// The second source operand is known to be a power of 2.
3483 Register DstReg = MI.getOperand(0).getReg();
3484 Register Src0 = MI.getOperand(1).getReg();
3485 Register Pow2Src1 = MI.getOperand(2).getReg();
3486 LLT Ty = MRI.getType(DstReg);
3487
3488 // Fold (urem x, pow2) -> (and x, pow2-1)
3489 auto NegOne = Builder.buildConstant(Ty, -1);
3490 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3491 Builder.buildAnd(DstReg, Src0, Add);
3492 MI.eraseFromParent();
3493}
3494
3496 unsigned &SelectOpNo) {
3497 Register LHS = MI.getOperand(1).getReg();
3498 Register RHS = MI.getOperand(2).getReg();
3499
3500 Register OtherOperandReg = RHS;
3501 SelectOpNo = 1;
3503
3504 // Don't do this unless the old select is going away. We want to eliminate the
3505 // binary operator, not replace a binop with a select.
3506 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3508 OtherOperandReg = LHS;
3509 SelectOpNo = 2;
3511 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3513 return false;
3514 }
3515
3516 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3517 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3518
3519 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3520 /*AllowFP*/ true,
3521 /*AllowOpaqueConstants*/ false))
3522 return false;
3523 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3524 /*AllowFP*/ true,
3525 /*AllowOpaqueConstants*/ false))
3526 return false;
3527
3528 unsigned BinOpcode = MI.getOpcode();
3529
3530 // We know that one of the operands is a select of constants. Now verify that
3531 // the other binary operator operand is either a constant, or we can handle a
3532 // variable.
3533 bool CanFoldNonConst =
3534 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3535 (isNullOrNullSplat(*SelectLHS, MRI) ||
3536 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3537 (isNullOrNullSplat(*SelectRHS, MRI) ||
3538 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3539 if (CanFoldNonConst)
3540 return true;
3541
3542 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3543 /*AllowFP*/ true,
3544 /*AllowOpaqueConstants*/ false);
3545}
3546
3547/// \p SelectOperand is the operand in binary operator \p MI that is the select
3548/// to fold.
3550 const unsigned &SelectOperand) {
3551 Register Dst = MI.getOperand(0).getReg();
3552 Register LHS = MI.getOperand(1).getReg();
3553 Register RHS = MI.getOperand(2).getReg();
3554 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3555
3556 Register SelectCond = Select->getOperand(1).getReg();
3557 Register SelectTrue = Select->getOperand(2).getReg();
3558 Register SelectFalse = Select->getOperand(3).getReg();
3559
3560 LLT Ty = MRI.getType(Dst);
3561 unsigned BinOpcode = MI.getOpcode();
3562
3563 Register FoldTrue, FoldFalse;
3564
3565 // We have a select-of-constants followed by a binary operator with a
3566 // constant. Eliminate the binop by pulling the constant math into the select.
3567 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3568 if (SelectOperand == 1) {
3569 // TODO: SelectionDAG verifies this actually constant folds before
3570 // committing to the combine.
3571
3572 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3573 FoldFalse =
3574 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3575 } else {
3576 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3577 FoldFalse =
3578 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3579 }
3580
3581 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3582 MI.eraseFromParent();
3583}
3584
3585std::optional<SmallVector<Register, 8>>
3586CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3587 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3588 // We want to detect if Root is part of a tree which represents a bunch
3589 // of loads being merged into a larger load. We'll try to recognize patterns
3590 // like, for example:
3591 //
3592 // Reg Reg
3593 // \ /
3594 // OR_1 Reg
3595 // \ /
3596 // OR_2
3597 // \ Reg
3598 // .. /
3599 // Root
3600 //
3601 // Reg Reg Reg Reg
3602 // \ / \ /
3603 // OR_1 OR_2
3604 // \ /
3605 // \ /
3606 // ...
3607 // Root
3608 //
3609 // Each "Reg" may have been produced by a load + some arithmetic. This
3610 // function will save each of them.
3611 SmallVector<Register, 8> RegsToVisit;
3613
3614 // In the "worst" case, we're dealing with a load for each byte. So, there
3615 // are at most #bytes - 1 ORs.
3616 const unsigned MaxIter =
3617 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3618 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3619 if (Ors.empty())
3620 break;
3621 const MachineInstr *Curr = Ors.pop_back_val();
3622 Register OrLHS = Curr->getOperand(1).getReg();
3623 Register OrRHS = Curr->getOperand(2).getReg();
3624
3625 // In the combine, we want to elimate the entire tree.
3626 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3627 return std::nullopt;
3628
3629 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3630 // something that may be a load + arithmetic.
3631 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3632 Ors.push_back(Or);
3633 else
3634 RegsToVisit.push_back(OrLHS);
3635 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3636 Ors.push_back(Or);
3637 else
3638 RegsToVisit.push_back(OrRHS);
3639 }
3640
3641 // We're going to try and merge each register into a wider power-of-2 type,
3642 // so we ought to have an even number of registers.
3643 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3644 return std::nullopt;
3645 return RegsToVisit;
3646}
3647
3648/// Helper function for findLoadOffsetsForLoadOrCombine.
3649///
3650/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3651/// and then moving that value into a specific byte offset.
3652///
3653/// e.g. x[i] << 24
3654///
3655/// \returns The load instruction and the byte offset it is moved into.
3656static std::optional<std::pair<GZExtLoad *, int64_t>>
3657matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3658 const MachineRegisterInfo &MRI) {
3659 assert(MRI.hasOneNonDBGUse(Reg) &&
3660 "Expected Reg to only have one non-debug use?");
3661 Register MaybeLoad;
3662 int64_t Shift;
3663 if (!mi_match(Reg, MRI,
3664 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3665 Shift = 0;
3666 MaybeLoad = Reg;
3667 }
3668
3669 if (Shift % MemSizeInBits != 0)
3670 return std::nullopt;
3671
3672 // TODO: Handle other types of loads.
3673 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3674 if (!Load)
3675 return std::nullopt;
3676
3677 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3678 return std::nullopt;
3679
3680 return std::make_pair(Load, Shift / MemSizeInBits);
3681}
3682
3683std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3684CombinerHelper::findLoadOffsetsForLoadOrCombine(
3686 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3687
3688 // Each load found for the pattern. There should be one for each RegsToVisit.
3690
3691 // The lowest index used in any load. (The lowest "i" for each x[i].)
3692 int64_t LowestIdx = INT64_MAX;
3693
3694 // The load which uses the lowest index.
3695 GZExtLoad *LowestIdxLoad = nullptr;
3696
3697 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3698 SmallSet<int64_t, 8> SeenIdx;
3699
3700 // Ensure each load is in the same MBB.
3701 // TODO: Support multiple MachineBasicBlocks.
3702 MachineBasicBlock *MBB = nullptr;
3703 const MachineMemOperand *MMO = nullptr;
3704
3705 // Earliest instruction-order load in the pattern.
3706 GZExtLoad *EarliestLoad = nullptr;
3707
3708 // Latest instruction-order load in the pattern.
3709 GZExtLoad *LatestLoad = nullptr;
3710
3711 // Base pointer which every load should share.
3713
3714 // We want to find a load for each register. Each load should have some
3715 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3716 // track of the load which uses the lowest index. Later, we will check if we
3717 // can use its pointer in the final, combined load.
3718 for (auto Reg : RegsToVisit) {
3719 // Find the load, and find the position that it will end up in (e.g. a
3720 // shifted) value.
3721 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3722 if (!LoadAndPos)
3723 return std::nullopt;
3724 GZExtLoad *Load;
3725 int64_t DstPos;
3726 std::tie(Load, DstPos) = *LoadAndPos;
3727
3728 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3729 // it is difficult to check for stores/calls/etc between loads.
3730 MachineBasicBlock *LoadMBB = Load->getParent();
3731 if (!MBB)
3732 MBB = LoadMBB;
3733 if (LoadMBB != MBB)
3734 return std::nullopt;
3735
3736 // Make sure that the MachineMemOperands of every seen load are compatible.
3737 auto &LoadMMO = Load->getMMO();
3738 if (!MMO)
3739 MMO = &LoadMMO;
3740 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3741 return std::nullopt;
3742
3743 // Find out what the base pointer and index for the load is.
3744 Register LoadPtr;
3745 int64_t Idx;
3746 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3747 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3748 LoadPtr = Load->getOperand(1).getReg();
3749 Idx = 0;
3750 }
3751
3752 // Don't combine things like a[i], a[i] -> a bigger load.
3753 if (!SeenIdx.insert(Idx).second)
3754 return std::nullopt;
3755
3756 // Every load must share the same base pointer; don't combine things like:
3757 //
3758 // a[i], b[i + 1] -> a bigger load.
3759 if (!BasePtr.isValid())
3760 BasePtr = LoadPtr;
3761 if (BasePtr != LoadPtr)
3762 return std::nullopt;
3763
3764 if (Idx < LowestIdx) {
3765 LowestIdx = Idx;
3766 LowestIdxLoad = Load;
3767 }
3768
3769 // Keep track of the byte offset that this load ends up at. If we have seen
3770 // the byte offset, then stop here. We do not want to combine:
3771 //
3772 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3773 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3774 return std::nullopt;
3775 Loads.insert(Load);
3776
3777 // Keep track of the position of the earliest/latest loads in the pattern.
3778 // We will check that there are no load fold barriers between them later
3779 // on.
3780 //
3781 // FIXME: Is there a better way to check for load fold barriers?
3782 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3783 EarliestLoad = Load;
3784 if (!LatestLoad || dominates(*LatestLoad, *Load))
3785 LatestLoad = Load;
3786 }
3787
3788 // We found a load for each register. Let's check if each load satisfies the
3789 // pattern.
3790 assert(Loads.size() == RegsToVisit.size() &&
3791 "Expected to find a load for each register?");
3792 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3793 LatestLoad && "Expected at least two loads?");
3794
3795 // Check if there are any stores, calls, etc. between any of the loads. If
3796 // there are, then we can't safely perform the combine.
3797 //
3798 // MaxIter is chosen based off the (worst case) number of iterations it
3799 // typically takes to succeed in the LLVM test suite plus some padding.
3800 //
3801 // FIXME: Is there a better way to check for load fold barriers?
3802 const unsigned MaxIter = 20;
3803 unsigned Iter = 0;
3804 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3805 LatestLoad->getIterator())) {
3806 if (Loads.count(&MI))
3807 continue;
3808 if (MI.isLoadFoldBarrier())
3809 return std::nullopt;
3810 if (Iter++ == MaxIter)
3811 return std::nullopt;
3812 }
3813
3814 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3815}
3816
3818 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3819 assert(MI.getOpcode() == TargetOpcode::G_OR);
3820 MachineFunction &MF = *MI.getMF();
3821 // Assuming a little-endian target, transform:
3822 // s8 *a = ...
3823 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3824 // =>
3825 // s32 val = *((i32)a)
3826 //
3827 // s8 *a = ...
3828 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3829 // =>
3830 // s32 val = BSWAP(*((s32)a))
3831 Register Dst = MI.getOperand(0).getReg();
3832 LLT Ty = MRI.getType(Dst);
3833 if (Ty.isVector())
3834 return false;
3835
3836 // We need to combine at least two loads into this type. Since the smallest
3837 // possible load is into a byte, we need at least a 16-bit wide type.
3838 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3839 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3840 return false;
3841
3842 // Match a collection of non-OR instructions in the pattern.
3843 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3844 if (!RegsToVisit)
3845 return false;
3846
3847 // We have a collection of non-OR instructions. Figure out how wide each of
3848 // the small loads should be based off of the number of potential loads we
3849 // found.
3850 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3851 if (NarrowMemSizeInBits % 8 != 0)
3852 return false;
3853
3854 // Check if each register feeding into each OR is a load from the same
3855 // base pointer + some arithmetic.
3856 //
3857 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3858 //
3859 // Also verify that each of these ends up putting a[i] into the same memory
3860 // offset as a load into a wide type would.
3862 GZExtLoad *LowestIdxLoad, *LatestLoad;
3863 int64_t LowestIdx;
3864 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3865 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3866 if (!MaybeLoadInfo)
3867 return false;
3868 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3869
3870 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3871 // we found before, check if this corresponds to a big or little endian byte
3872 // pattern. If it does, then we can represent it using a load + possibly a
3873 // BSWAP.
3874 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3875 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3876 if (!IsBigEndian)
3877 return false;
3878 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3879 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3880 return false;
3881
3882 // Make sure that the load from the lowest index produces offset 0 in the
3883 // final value.
3884 //
3885 // This ensures that we won't combine something like this:
3886 //
3887 // load x[i] -> byte 2
3888 // load x[i+1] -> byte 0 ---> wide_load x[i]
3889 // load x[i+2] -> byte 1
3890 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3891 const unsigned ZeroByteOffset =
3892 *IsBigEndian
3893 ? bigEndianByteAt(NumLoadsInTy, 0)
3894 : littleEndianByteAt(NumLoadsInTy, 0);
3895 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3896 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3897 ZeroOffsetIdx->second != LowestIdx)
3898 return false;
3899
3900 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3901 // may not use index 0.
3902 Register Ptr = LowestIdxLoad->getPointerReg();
3903 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3904 LegalityQuery::MemDesc MMDesc(MMO);
3905 MMDesc.MemoryTy = Ty;
3907 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3908 return false;
3909 auto PtrInfo = MMO.getPointerInfo();
3910 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3911
3912 // Load must be allowed and fast on the target.
3914 auto &DL = MF.getDataLayout();
3915 unsigned Fast = 0;
3916 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3917 !Fast)
3918 return false;
3919
3920 MatchInfo = [=](MachineIRBuilder &MIB) {
3921 MIB.setInstrAndDebugLoc(*LatestLoad);
3922 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3923 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3924 if (NeedsBSwap)
3925 MIB.buildBSwap(Dst, LoadDst);
3926 };
3927 return true;
3928}
3929
3931 MachineInstr *&ExtMI) {
3932 auto &PHI = cast<GPhi>(MI);
3933 Register DstReg = PHI.getReg(0);
3934
3935 // TODO: Extending a vector may be expensive, don't do this until heuristics
3936 // are better.
3937 if (MRI.getType(DstReg).isVector())
3938 return false;
3939
3940 // Try to match a phi, whose only use is an extend.
3941 if (!MRI.hasOneNonDBGUse(DstReg))
3942 return false;
3943 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3944 switch (ExtMI->getOpcode()) {
3945 case TargetOpcode::G_ANYEXT:
3946 return true; // G_ANYEXT is usually free.
3947 case TargetOpcode::G_ZEXT:
3948 case TargetOpcode::G_SEXT:
3949 break;
3950 default:
3951 return false;
3952 }
3953
3954 // If the target is likely to fold this extend away, don't propagate.
3956 return false;
3957
3958 // We don't want to propagate the extends unless there's a good chance that
3959 // they'll be optimized in some way.
3960 // Collect the unique incoming values.
3962 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3963 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3964 switch (DefMI->getOpcode()) {
3965 case TargetOpcode::G_LOAD:
3966 case TargetOpcode::G_TRUNC:
3967 case TargetOpcode::G_SEXT:
3968 case TargetOpcode::G_ZEXT:
3969 case TargetOpcode::G_ANYEXT:
3970 case TargetOpcode::G_CONSTANT:
3971 InSrcs.insert(DefMI);
3972 // Don't try to propagate if there are too many places to create new
3973 // extends, chances are it'll increase code size.
3974 if (InSrcs.size() > 2)
3975 return false;
3976 break;
3977 default:
3978 return false;
3979 }
3980 }
3981 return true;
3982}
3983
3985 MachineInstr *&ExtMI) {
3986 auto &PHI = cast<GPhi>(MI);
3987 Register DstReg = ExtMI->getOperand(0).getReg();
3988 LLT ExtTy = MRI.getType(DstReg);
3989
3990 // Propagate the extension into the block of each incoming reg's block.
3991 // Use a SetVector here because PHIs can have duplicate edges, and we want
3992 // deterministic iteration order.
3995 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3996 auto SrcReg = PHI.getIncomingValue(I);
3997 auto *SrcMI = MRI.getVRegDef(SrcReg);
3998 if (!SrcMIs.insert(SrcMI))
3999 continue;
4000
4001 // Build an extend after each src inst.
4002 auto *MBB = SrcMI->getParent();
4003 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4004 if (InsertPt != MBB->end() && InsertPt->isPHI())
4005 InsertPt = MBB->getFirstNonPHI();
4006
4007 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4008 Builder.setDebugLoc(MI.getDebugLoc());
4009 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4010 OldToNewSrcMap[SrcMI] = NewExt;
4011 }
4012
4013 // Create a new phi with the extended inputs.
4015 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4016 NewPhi.addDef(DstReg);
4017 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4018 if (!MO.isReg()) {
4019 NewPhi.addMBB(MO.getMBB());
4020 continue;
4021 }
4022 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4023 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4024 }
4025 Builder.insertInstr(NewPhi);
4026 ExtMI->eraseFromParent();
4027}
4028
4030 Register &Reg) {
4031 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4032 // If we have a constant index, look for a G_BUILD_VECTOR source
4033 // and find the source register that the index maps to.
4034 Register SrcVec = MI.getOperand(1).getReg();
4035 LLT SrcTy = MRI.getType(SrcVec);
4036
4037 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4038 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4039 return false;
4040
4041 unsigned VecIdx = Cst->Value.getZExtValue();
4042
4043 // Check if we have a build_vector or build_vector_trunc with an optional
4044 // trunc in front.
4045 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4046 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4047 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4048 }
4049
4050 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4051 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4052 return false;
4053
4054 EVT Ty(getMVTForLLT(SrcTy));
4055 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4056 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4057 return false;
4058
4059 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4060 return true;
4061}
4062
4064 Register &Reg) {
4065 // Check the type of the register, since it may have come from a
4066 // G_BUILD_VECTOR_TRUNC.
4067 LLT ScalarTy = MRI.getType(Reg);
4068 Register DstReg = MI.getOperand(0).getReg();
4069 LLT DstTy = MRI.getType(DstReg);
4070
4071 if (ScalarTy != DstTy) {
4072 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4073 Builder.buildTrunc(DstReg, Reg);
4074 MI.eraseFromParent();
4075 return;
4076 }
4078}
4079
4082 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4083 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4084 // This combine tries to find build_vector's which have every source element
4085 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4086 // the masked load scalarization is run late in the pipeline. There's already
4087 // a combine for a similar pattern starting from the extract, but that
4088 // doesn't attempt to do it if there are multiple uses of the build_vector,
4089 // which in this case is true. Starting the combine from the build_vector
4090 // feels more natural than trying to find sibling nodes of extracts.
4091 // E.g.
4092 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4093 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4094 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4095 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4096 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4097 // ==>
4098 // replace ext{1,2,3,4} with %s{1,2,3,4}
4099
4100 Register DstReg = MI.getOperand(0).getReg();
4101 LLT DstTy = MRI.getType(DstReg);
4102 unsigned NumElts = DstTy.getNumElements();
4103
4104 SmallBitVector ExtractedElts(NumElts);
4105 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4106 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4107 return false;
4108 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4109 if (!Cst)
4110 return false;
4111 unsigned Idx = Cst->getZExtValue();
4112 if (Idx >= NumElts)
4113 return false; // Out of range.
4114 ExtractedElts.set(Idx);
4115 SrcDstPairs.emplace_back(
4116 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4117 }
4118 // Match if every element was extracted.
4119 return ExtractedElts.all();
4120}
4121
4124 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4125 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4126 for (auto &Pair : SrcDstPairs) {
4127 auto *ExtMI = Pair.second;
4128 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4129 ExtMI->eraseFromParent();
4130 }
4131 MI.eraseFromParent();
4132}
4133
4135 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4136 applyBuildFnNoErase(MI, MatchInfo);
4137 MI.eraseFromParent();
4138}
4139
4141 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4142 MatchInfo(Builder);
4143}
4144
4146 BuildFnTy &MatchInfo) {
4147 assert(MI.getOpcode() == TargetOpcode::G_OR);
4148
4149 Register Dst = MI.getOperand(0).getReg();
4150 LLT Ty = MRI.getType(Dst);
4151 unsigned BitWidth = Ty.getScalarSizeInBits();
4152
4153 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4154 unsigned FshOpc = 0;
4155
4156 // Match (or (shl ...), (lshr ...)).
4157 if (!mi_match(Dst, MRI,
4158 // m_GOr() handles the commuted version as well.
4159 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4160 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4161 return false;
4162
4163 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4164 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4165 int64_t CstShlAmt, CstLShrAmt;
4166 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4167 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4168 CstShlAmt + CstLShrAmt == BitWidth) {
4169 FshOpc = TargetOpcode::G_FSHR;
4170 Amt = LShrAmt;
4171
4172 } else if (mi_match(LShrAmt, MRI,
4174 ShlAmt == Amt) {
4175 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4176 FshOpc = TargetOpcode::G_FSHL;
4177
4178 } else if (mi_match(ShlAmt, MRI,
4180 LShrAmt == Amt) {
4181 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4182 FshOpc = TargetOpcode::G_FSHR;
4183
4184 } else {
4185 return false;
4186 }
4187
4188 LLT AmtTy = MRI.getType(Amt);
4189 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4190 return false;
4191
4192 MatchInfo = [=](MachineIRBuilder &B) {
4193 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4194 };
4195 return true;
4196}
4197
4198/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4200 unsigned Opc = MI.getOpcode();
4201 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4202 Register X = MI.getOperand(1).getReg();
4203 Register Y = MI.getOperand(2).getReg();
4204 if (X != Y)
4205 return false;
4206 unsigned RotateOpc =
4207 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4208 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4209}
4210
4212 unsigned Opc = MI.getOpcode();
4213 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4214 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4216 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4217 : TargetOpcode::G_ROTR));
4218 MI.removeOperand(2);
4220}
4221
4222// Fold (rot x, c) -> (rot x, c % BitSize)
4224 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4225 MI.getOpcode() == TargetOpcode::G_ROTR);
4226 unsigned Bitsize =
4227 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4228 Register AmtReg = MI.getOperand(2).getReg();
4229 bool OutOfRange = false;
4230 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4231 if (auto *CI = dyn_cast<ConstantInt>(C))
4232 OutOfRange |= CI->getValue().uge(Bitsize);
4233 return true;
4234 };
4235 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4236}
4237
4239 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4240 MI.getOpcode() == TargetOpcode::G_ROTR);
4241 unsigned Bitsize =
4242 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4243 Register Amt = MI.getOperand(2).getReg();
4244 LLT AmtTy = MRI.getType(Amt);
4245 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4246 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4248 MI.getOperand(2).setReg(Amt);
4250}
4251
4253 int64_t &MatchInfo) {
4254 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4255 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4256 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4257 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4258 std::optional<bool> KnownVal;
4259 switch (Pred) {
4260 default:
4261 llvm_unreachable("Unexpected G_ICMP predicate?");
4262 case CmpInst::ICMP_EQ:
4263 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4264 break;
4265 case CmpInst::ICMP_NE:
4266 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4267 break;
4268 case CmpInst::ICMP_SGE:
4269 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4270 break;
4271 case CmpInst::ICMP_SGT:
4272 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4273 break;
4274 case CmpInst::ICMP_SLE:
4275 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4276 break;
4277 case CmpInst::ICMP_SLT:
4278 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4279 break;
4280 case CmpInst::ICMP_UGE:
4281 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4282 break;
4283 case CmpInst::ICMP_UGT:
4284 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4285 break;
4286 case CmpInst::ICMP_ULE:
4287 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4288 break;
4289 case CmpInst::ICMP_ULT:
4290 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4291 break;
4292 }
4293 if (!KnownVal)
4294 return false;
4295 MatchInfo =
4296 *KnownVal
4298 /*IsVector = */
4299 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4300 /* IsFP = */ false)
4301 : 0;
4302 return true;
4303}
4304
4306 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4307 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4308 // Given:
4309 //
4310 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4311 // %cmp = G_ICMP ne %x, 0
4312 //
4313 // Or:
4314 //
4315 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4316 // %cmp = G_ICMP eq %x, 1
4317 //
4318 // We can replace %cmp with %x assuming true is 1 on the target.
4319 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4320 if (!CmpInst::isEquality(Pred))
4321 return false;
4322 Register Dst = MI.getOperand(0).getReg();
4323 LLT DstTy = MRI.getType(Dst);
4325 /* IsFP = */ false) != 1)
4326 return false;
4327 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4328 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4329 return false;
4330 Register LHS = MI.getOperand(2).getReg();
4331 auto KnownLHS = KB->getKnownBits(LHS);
4332 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4333 return false;
4334 // Make sure replacing Dst with the LHS is a legal operation.
4335 LLT LHSTy = MRI.getType(LHS);
4336 unsigned LHSSize = LHSTy.getSizeInBits();
4337 unsigned DstSize = DstTy.getSizeInBits();
4338 unsigned Op = TargetOpcode::COPY;
4339 if (DstSize != LHSSize)
4340 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4341 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4342 return false;
4343 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4344 return true;
4345}
4346
4347// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4349 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4350 assert(MI.getOpcode() == TargetOpcode::G_AND);
4351
4352 // Ignore vector types to simplify matching the two constants.
4353 // TODO: do this for vectors and scalars via a demanded bits analysis.
4354 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4355 if (Ty.isVector())
4356 return false;
4357
4358 Register Src;
4359 Register AndMaskReg;
4360 int64_t AndMaskBits;
4361 int64_t OrMaskBits;
4362 if (!mi_match(MI, MRI,
4363 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4364 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4365 return false;
4366
4367 // Check if OrMask could turn on any bits in Src.
4368 if (AndMaskBits & OrMaskBits)
4369 return false;
4370
4371 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4373 // Canonicalize the result to have the constant on the RHS.
4374 if (MI.getOperand(1).getReg() == AndMaskReg)
4375 MI.getOperand(2).setReg(AndMaskReg);
4376 MI.getOperand(1).setReg(Src);
4378 };
4379 return true;
4380}
4381
4382/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4384 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4385 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4386 Register Dst = MI.getOperand(0).getReg();
4387 Register Src = MI.getOperand(1).getReg();
4388 LLT Ty = MRI.getType(Src);
4390 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4391 return false;
4392 int64_t Width = MI.getOperand(2).getImm();
4393 Register ShiftSrc;
4394 int64_t ShiftImm;
4395 if (!mi_match(
4396 Src, MRI,
4397 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4398 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4399 return false;
4400 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4401 return false;
4402
4403 MatchInfo = [=](MachineIRBuilder &B) {
4404 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4405 auto Cst2 = B.buildConstant(ExtractTy, Width);
4406 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4407 };
4408 return true;
4409}
4410
4411/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4413 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4414 assert(MI.getOpcode() == TargetOpcode::G_AND);
4415 Register Dst = MI.getOperand(0).getReg();
4416 LLT Ty = MRI.getType(Dst);
4418 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4419 return false;
4420
4421 int64_t AndImm, LSBImm;
4422 Register ShiftSrc;
4423 const unsigned Size = Ty.getScalarSizeInBits();
4424 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4425 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4426 m_ICst(AndImm))))
4427 return false;
4428
4429 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4430 auto MaybeMask = static_cast<uint64_t>(AndImm);
4431 if (MaybeMask & (MaybeMask + 1))
4432 return false;
4433
4434 // LSB must fit within the register.
4435 if (static_cast<uint64_t>(LSBImm) >= Size)
4436 return false;
4437
4438 uint64_t Width = APInt(Size, AndImm).countr_one();
4439 MatchInfo = [=](MachineIRBuilder &B) {
4440 auto WidthCst = B.buildConstant(ExtractTy, Width);
4441 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4442 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4443 };
4444 return true;
4445}
4446
4448 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4449 const unsigned Opcode = MI.getOpcode();
4450 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4451
4452 const Register Dst = MI.getOperand(0).getReg();
4453
4454 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4455 ? TargetOpcode::G_SBFX
4456 : TargetOpcode::G_UBFX;
4457
4458 // Check if the type we would use for the extract is legal
4459 LLT Ty = MRI.getType(Dst);
4461 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4462 return false;
4463
4464 Register ShlSrc;
4465 int64_t ShrAmt;
4466 int64_t ShlAmt;
4467 const unsigned Size = Ty.getScalarSizeInBits();
4468
4469 // Try to match shr (shl x, c1), c2
4470 if (!mi_match(Dst, MRI,
4471 m_BinOp(Opcode,
4472 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4473 m_ICst(ShrAmt))))
4474 return false;
4475
4476 // Make sure that the shift sizes can fit a bitfield extract
4477 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4478 return false;
4479
4480 // Skip this combine if the G_SEXT_INREG combine could handle it
4481 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4482 return false;
4483
4484 // Calculate start position and width of the extract
4485 const int64_t Pos = ShrAmt - ShlAmt;
4486 const int64_t Width = Size - ShrAmt;
4487
4488 MatchInfo = [=](MachineIRBuilder &B) {
4489 auto WidthCst = B.buildConstant(ExtractTy, Width);
4490 auto PosCst = B.buildConstant(ExtractTy, Pos);
4491 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4492 };
4493 return true;
4494}
4495
4497 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4498 const unsigned Opcode = MI.getOpcode();
4499 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4500
4501 const Register Dst = MI.getOperand(0).getReg();
4502 LLT Ty = MRI.getType(Dst);
4504 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4505 return false;
4506
4507 // Try to match shr (and x, c1), c2
4508 Register AndSrc;
4509 int64_t ShrAmt;
4510 int64_t SMask;
4511 if (!mi_match(Dst, MRI,
4512 m_BinOp(Opcode,
4513 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4514 m_ICst(ShrAmt))))
4515 return false;
4516
4517 const unsigned Size = Ty.getScalarSizeInBits();
4518 if (ShrAmt < 0 || ShrAmt >= Size)
4519 return false;
4520
4521 // If the shift subsumes the mask, emit the 0 directly.
4522 if (0 == (SMask >> ShrAmt)) {
4523 MatchInfo = [=](MachineIRBuilder &B) {
4524 B.buildConstant(Dst, 0);
4525 };
4526 return true;
4527 }
4528
4529 // Check that ubfx can do the extraction, with no holes in the mask.
4530 uint64_t UMask = SMask;
4531 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4532 UMask &= maskTrailingOnes<uint64_t>(Size);
4533 if (!isMask_64(UMask))
4534 return false;
4535
4536 // Calculate start position and width of the extract.
4537 const int64_t Pos = ShrAmt;
4538 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4539
4540 // It's preferable to keep the shift, rather than form G_SBFX.
4541 // TODO: remove the G_AND via demanded bits analysis.
4542 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4543 return false;
4544
4545 MatchInfo = [=](MachineIRBuilder &B) {
4546 auto WidthCst = B.buildConstant(ExtractTy, Width);
4547 auto PosCst = B.buildConstant(ExtractTy, Pos);
4548 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4549 };
4550 return true;
4551}
4552
4553bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4554 MachineInstr &MI) {
4555 auto &PtrAdd = cast<GPtrAdd>(MI);
4556
4557 Register Src1Reg = PtrAdd.getBaseReg();
4558 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4559 if (!Src1Def)
4560 return false;
4561
4562 Register Src2Reg = PtrAdd.getOffsetReg();
4563
4564 if (MRI.hasOneNonDBGUse(Src1Reg))
4565 return false;
4566
4567 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4568 if (!C1)
4569 return false;
4570 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4571 if (!C2)
4572 return false;
4573
4574 const APInt &C1APIntVal = *C1;
4575 const APInt &C2APIntVal = *C2;
4576 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4577
4578 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4579 // This combine may end up running before ptrtoint/inttoptr combines
4580 // manage to eliminate redundant conversions, so try to look through them.
4581 MachineInstr *ConvUseMI = &UseMI;
4582 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4583 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4584 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4585 Register DefReg = ConvUseMI->getOperand(0).getReg();
4586 if (!MRI.hasOneNonDBGUse(DefReg))
4587 break;
4588 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4589 ConvUseOpc = ConvUseMI->getOpcode();
4590 }
4591 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4592 if (!LdStMI)
4593 continue;
4594 // Is x[offset2] already not a legal addressing mode? If so then
4595 // reassociating the constants breaks nothing (we test offset2 because
4596 // that's the one we hope to fold into the load or store).
4598 AM.HasBaseReg = true;
4599 AM.BaseOffs = C2APIntVal.getSExtValue();
4600 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4601 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4602 PtrAdd.getMF()->getFunction().getContext());
4603 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4604 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4605 AccessTy, AS))
4606 continue;
4607
4608 // Would x[offset1+offset2] still be a legal addressing mode?
4609 AM.BaseOffs = CombinedValue;
4610 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4611 AccessTy, AS))
4612 return true;
4613 }
4614
4615 return false;
4616}
4617
4619 MachineInstr *RHS,
4620 BuildFnTy &MatchInfo) {
4621 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4622 Register Src1Reg = MI.getOperand(1).getReg();
4623 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4624 return false;
4625 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4626 if (!C2)
4627 return false;
4628
4629 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4630 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4631
4632 auto NewBase =
4633 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4635 MI.getOperand(1).setReg(NewBase.getReg(0));
4636 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4638 };
4639 return !reassociationCanBreakAddressingModePattern(MI);
4640}
4641
4643 MachineInstr *LHS,
4644 MachineInstr *RHS,
4645 BuildFnTy &MatchInfo) {
4646 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4647 // if and only if (G_PTR_ADD X, C) has one use.
4648 Register LHSBase;
4649 std::optional<ValueAndVReg> LHSCstOff;
4650 if (!mi_match(MI.getBaseReg(), MRI,
4651 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4652 return false;
4653
4654 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4655 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4656 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4657 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4658 // doesn't happen.
4659 LHSPtrAdd->moveBefore(&MI);
4660 Register RHSReg = MI.getOffsetReg();
4661 // set VReg will cause type mismatch if it comes from extend/trunc
4662 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4664 MI.getOperand(2).setReg(NewCst.getReg(0));
4666 Observer.changingInstr(*LHSPtrAdd);
4667 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4668 Observer.changedInstr(*LHSPtrAdd);
4669 };
4670 return !reassociationCanBreakAddressingModePattern(MI);
4671}
4672
4674 MachineInstr *LHS,
4675 MachineInstr *RHS,
4676 BuildFnTy &MatchInfo) {
4677 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4678 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4679 if (!LHSPtrAdd)
4680 return false;
4681
4682 Register Src2Reg = MI.getOperand(2).getReg();
4683 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4684 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4685 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4686 if (!C1)
4687 return false;
4688 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4689 if (!C2)
4690 return false;
4691
4692 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4693 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4695 MI.getOperand(1).setReg(LHSSrc1);
4696 MI.getOperand(2).setReg(NewCst.getReg(0));
4698 };
4699 return !reassociationCanBreakAddressingModePattern(MI);
4700}
4701
4703 BuildFnTy &MatchInfo) {
4704 auto &PtrAdd = cast<GPtrAdd>(MI);
4705 // We're trying to match a few pointer computation patterns here for
4706 // re-association opportunities.
4707 // 1) Isolating a constant operand to be on the RHS, e.g.:
4708 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4709 //
4710 // 2) Folding two constants in each sub-tree as long as such folding
4711 // doesn't break a legal addressing mode.
4712 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4713 //
4714 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4715 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4716 // iif (G_PTR_ADD X, C) has one use.
4717 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4718 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4719
4720 // Try to match example 2.
4721 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4722 return true;
4723
4724 // Try to match example 3.
4725 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4726 return true;
4727
4728 // Try to match example 1.
4729 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4730 return true;
4731
4732 return false;
4733}
4735 Register OpLHS, Register OpRHS,
4736 BuildFnTy &MatchInfo) {
4737 LLT OpRHSTy = MRI.getType(OpRHS);
4738 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4739
4740 if (OpLHSDef->getOpcode() != Opc)
4741 return false;
4742
4743 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4744 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4745 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4746
4747 // If the inner op is (X op C), pull the constant out so it can be folded with
4748 // other constants in the expression tree. Folding is not guaranteed so we
4749 // might have (C1 op C2). In that case do not pull a constant out because it
4750 // won't help and can lead to infinite loops.
4753 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4754 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4755 MatchInfo = [=](MachineIRBuilder &B) {
4756 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4757 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4758 };
4759 return true;
4760 }
4761 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4762 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4763 // iff (op x, c1) has one use
4764 MatchInfo = [=](MachineIRBuilder &B) {
4765 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4766 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4767 };
4768 return true;
4769 }
4770 }
4771
4772 return false;
4773}
4774
4776 BuildFnTy &MatchInfo) {
4777 // We don't check if the reassociation will break a legal addressing mode
4778 // here since pointer arithmetic is handled by G_PTR_ADD.
4779 unsigned Opc = MI.getOpcode();
4780 Register DstReg = MI.getOperand(0).getReg();
4781 Register LHSReg = MI.getOperand(1).getReg();
4782 Register RHSReg = MI.getOperand(2).getReg();
4783
4784 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4785 return true;
4786 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4787 return true;
4788 return false;
4789}
4790
4792 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4793 Register SrcOp = MI.getOperand(1).getReg();
4794
4795 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4796 MatchInfo = *MaybeCst;
4797 return true;
4798 }
4799
4800 return false;
4801}
4802
4804 Register Op1 = MI.getOperand(1).getReg();
4805 Register Op2 = MI.getOperand(2).getReg();
4806 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4807 if (!MaybeCst)
4808 return false;
4809 MatchInfo = *MaybeCst;
4810 return true;
4811}
4812
4814 Register Op1 = MI.getOperand(1).getReg();
4815 Register Op2 = MI.getOperand(2).getReg();
4816 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4817 if (!MaybeCst)
4818 return false;
4819 MatchInfo =
4820 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4821 return true;
4822}
4823
4825 ConstantFP *&MatchInfo) {
4826 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4827 MI.getOpcode() == TargetOpcode::G_FMAD);
4828 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4829
4830 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4831 if (!Op3Cst)
4832 return false;
4833
4834 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4835 if (!Op2Cst)
4836 return false;
4837
4838 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4839 if (!Op1Cst)
4840 return false;
4841
4842 APFloat Op1F = Op1Cst->getValueAPF();
4843 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4845 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4846 return true;
4847}
4848
4850 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4851 // Look for a binop feeding into an AND with a mask:
4852 //
4853 // %add = G_ADD %lhs, %rhs
4854 // %and = G_AND %add, 000...11111111
4855 //
4856 // Check if it's possible to perform the binop at a narrower width and zext
4857 // back to the original width like so:
4858 //
4859 // %narrow_lhs = G_TRUNC %lhs
4860 // %narrow_rhs = G_TRUNC %rhs
4861 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4862 // %new_add = G_ZEXT %narrow_add
4863 // %and = G_AND %new_add, 000...11111111
4864 //
4865 // This can allow later combines to eliminate the G_AND if it turns out
4866 // that the mask is irrelevant.
4867 assert(MI.getOpcode() == TargetOpcode::G_AND);
4868 Register Dst = MI.getOperand(0).getReg();
4869 Register AndLHS = MI.getOperand(1).getReg();
4870 Register AndRHS = MI.getOperand(2).getReg();
4871 LLT WideTy = MRI.getType(Dst);
4872
4873 // If the potential binop has more than one use, then it's possible that one
4874 // of those uses will need its full width.
4875 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4876 return false;
4877
4878 // Check if the LHS feeding the AND is impacted by the high bits that we're
4879 // masking out.
4880 //
4881 // e.g. for 64-bit x, y:
4882 //
4883 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4884 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4885 if (!LHSInst)
4886 return false;
4887 unsigned LHSOpc = LHSInst->getOpcode();
4888 switch (LHSOpc) {
4889 default:
4890 return false;
4891 case TargetOpcode::G_ADD:
4892 case TargetOpcode::G_SUB:
4893 case TargetOpcode::G_MUL:
4894 case TargetOpcode::G_AND:
4895 case TargetOpcode::G_OR:
4896 case TargetOpcode::G_XOR:
4897 break;
4898 }
4899
4900 // Find the mask on the RHS.
4901 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4902 if (!Cst)
4903 return false;
4904 auto Mask = Cst->Value;
4905 if (!Mask.isMask())
4906 return false;
4907
4908 // No point in combining if there's nothing to truncate.
4909 unsigned NarrowWidth = Mask.countr_one();
4910 if (NarrowWidth == WideTy.getSizeInBits())
4911 return false;
4912 LLT NarrowTy = LLT::scalar(NarrowWidth);
4913
4914 // Check if adding the zext + truncates could be harmful.
4915 auto &MF = *MI.getMF();
4916 const auto &TLI = getTargetLowering();
4917 LLVMContext &Ctx = MF.getFunction().getContext();
4918 auto &DL = MF.getDataLayout();
4919 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4920 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4921 return false;
4922 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4923 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4924 return false;
4925 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4926 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4927 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4928 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4929 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4930 auto NarrowBinOp =
4931 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4932 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4934 MI.getOperand(1).setReg(Ext.getReg(0));
4936 };
4937 return true;
4938}
4939
4941 unsigned Opc = MI.getOpcode();
4942 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4943
4944 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4945 return false;
4946
4947 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4949 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4950 : TargetOpcode::G_SADDO;
4951 MI.setDesc(Builder.getTII().get(NewOpc));
4952 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4954 };
4955 return true;
4956}
4957
4959 // (G_*MULO x, 0) -> 0 + no carry out
4960 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
4961 MI.getOpcode() == TargetOpcode::G_SMULO);
4962 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
4963 return false;
4964 Register Dst = MI.getOperand(0).getReg();
4965 Register Carry = MI.getOperand(1).getReg();
4968 return false;
4969 MatchInfo = [=](MachineIRBuilder &B) {
4970 B.buildConstant(Dst, 0);
4971 B.buildConstant(Carry, 0);
4972 };
4973 return true;
4974}
4975
4977 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
4978 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
4979 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
4980 MI.getOpcode() == TargetOpcode::G_SADDE ||
4981 MI.getOpcode() == TargetOpcode::G_USUBE ||
4982 MI.getOpcode() == TargetOpcode::G_SSUBE);
4983 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
4984 return false;
4985 MatchInfo = [&](MachineIRBuilder &B) {
4986 unsigned NewOpcode;
4987 switch (MI.getOpcode()) {
4988 case TargetOpcode::G_UADDE:
4989 NewOpcode = TargetOpcode::G_UADDO;
4990 break;
4991 case TargetOpcode::G_SADDE:
4992 NewOpcode = TargetOpcode::G_SADDO;
4993 break;
4994 case TargetOpcode::G_USUBE:
4995 NewOpcode = TargetOpcode::G_USUBO;
4996 break;
4997 case TargetOpcode::G_SSUBE:
4998 NewOpcode = TargetOpcode::G_SSUBO;
4999 break;
5000 }
5002 MI.setDesc(B.getTII().get(NewOpcode));
5003 MI.removeOperand(4);
5005 };
5006 return true;
5007}
5008
5010 BuildFnTy &MatchInfo) {
5011 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5012 Register Dst = MI.getOperand(0).getReg();
5013 // (x + y) - z -> x (if y == z)
5014 // (x + y) - z -> y (if x == z)
5015 Register X, Y, Z;
5016 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5017 Register ReplaceReg;
5018 int64_t CstX, CstY;
5019 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5021 ReplaceReg = X;
5022 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5024 ReplaceReg = Y;
5025 if (ReplaceReg) {
5026 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5027 return true;
5028 }
5029 }
5030
5031 // x - (y + z) -> 0 - y (if x == z)
5032 // x - (y + z) -> 0 - z (if x == y)
5033 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5034 Register ReplaceReg;
5035 int64_t CstX;
5036 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5038 ReplaceReg = Y;
5039 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5041 ReplaceReg = Z;
5042 if (ReplaceReg) {
5043 MatchInfo = [=](MachineIRBuilder &B) {
5044 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5045 B.buildSub(Dst, Zero, ReplaceReg);
5046 };
5047 return true;
5048 }
5049 }
5050 return false;
5051}
5052
5054 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5055 auto &UDiv = cast<GenericMachineInstr>(MI);
5056 Register Dst = UDiv.getReg(0);
5057 Register LHS = UDiv.getReg(1);
5058 Register RHS = UDiv.getReg(2);
5059 LLT Ty = MRI.getType(Dst);
5060 LLT ScalarTy = Ty.getScalarType();
5061 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5063 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5064
5065 unsigned KnownLeadingZeros =
5067 auto &MIB = Builder;
5068
5069 bool UseNPQ = false;
5070 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5071
5072 auto BuildUDIVPattern = [&](const Constant *C) {
5073 auto *CI = cast<ConstantInt>(C);
5074 const APInt &Divisor = CI->getValue();
5075
5076 bool SelNPQ = false;
5077 APInt Magic(Divisor.getBitWidth(), 0);
5078 unsigned PreShift = 0, PostShift = 0;
5079
5080 // Magic algorithm doesn't work for division by 1. We need to emit a select
5081 // at the end.
5082 // TODO: Use undef values for divisor of 1.
5083 if (!Divisor.isOne()) {
5084
5085 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5086 // in the dividend exceeds the leading zeros for the divisor.
5089 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5090
5091 Magic = std::move(magics.Magic);
5092
5093 assert(magics.PreShift < Divisor.getBitWidth() &&
5094 "We shouldn't generate an undefined shift!");
5095 assert(magics.PostShift < Divisor.getBitWidth() &&
5096 "We shouldn't generate an undefined shift!");
5097 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5098 PreShift = magics.PreShift;
5099 PostShift = magics.PostShift;
5100 SelNPQ = magics.IsAdd;
5101 }
5102
5103 PreShifts.push_back(
5104 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5105 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5106 NPQFactors.push_back(
5107 MIB.buildConstant(ScalarTy,
5108 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5109 : APInt::getZero(EltBits))
5110 .getReg(0));
5111 PostShifts.push_back(
5112 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5113 UseNPQ |= SelNPQ;
5114 return true;
5115 };
5116
5117 // Collect the shifts/magic values from each element.
5118 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5119 (void)Matched;
5120 assert(Matched && "Expected unary predicate match to succeed");
5121
5122 Register PreShift, PostShift, MagicFactor, NPQFactor;
5123 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5124 if (RHSDef) {
5125 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5126 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5127 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5128 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5129 } else {
5131 "Non-build_vector operation should have been a scalar");
5132 PreShift = PreShifts[0];
5133 MagicFactor = MagicFactors[0];
5134 PostShift = PostShifts[0];
5135 }
5136
5137 Register Q = LHS;
5138 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5139
5140 // Multiply the numerator (operand 0) by the magic value.
5141 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5142
5143 if (UseNPQ) {
5144 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5145
5146 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5147 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5148 if (Ty.isVector())
5149 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5150 else
5151 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5152
5153 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5154 }
5155
5156 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5157 auto One = MIB.buildConstant(Ty, 1);
5158 auto IsOne = MIB.buildICmp(
5160 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5161 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5162}
5163
5165 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5166 Register Dst = MI.getOperand(0).getReg();
5167 Register RHS = MI.getOperand(2).getReg();
5168 LLT DstTy = MRI.getType(Dst);
5169 auto *RHSDef = MRI.getVRegDef(RHS);
5170 if (!isConstantOrConstantVector(*RHSDef, MRI))
5171 return false;
5172
5173 auto &MF = *MI.getMF();
5174 AttributeList Attr = MF.getFunction().getAttributes();
5175 const auto &TLI = getTargetLowering();
5176 LLVMContext &Ctx = MF.getFunction().getContext();
5177 auto &DL = MF.getDataLayout();
5178 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5179 return false;
5180
5181 // Don't do this for minsize because the instruction sequence is usually
5182 // larger.
5183 if (MF.getFunction().hasMinSize())
5184 return false;
5185
5186 // Don't do this if the types are not going to be legal.
5187 if (LI) {
5188 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5189 return false;
5190 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5191 return false;
5193 {TargetOpcode::G_ICMP,
5194 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5195 DstTy}}))
5196 return false;
5197 }
5198
5199 return matchUnaryPredicate(
5200 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5201}
5202
5204 auto *NewMI = buildUDivUsingMul(MI);
5205 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5206}
5207
5209 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5210 Register Dst = MI.getOperand(0).getReg();
5211 Register RHS = MI.getOperand(2).getReg();
5212 LLT DstTy = MRI.getType(Dst);
5213
5214 auto &MF = *MI.getMF();
5215 AttributeList Attr = MF.getFunction().getAttributes();
5216 const auto &TLI = getTargetLowering();
5217 LLVMContext &Ctx = MF.getFunction().getContext();
5218 auto &DL = MF.getDataLayout();
5219 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5220 return false;
5221
5222 // Don't do this for minsize because the instruction sequence is usually
5223 // larger.
5224 if (MF.getFunction().hasMinSize())
5225 return false;
5226
5227 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5228 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5229 return matchUnaryPredicate(
5230 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5231 }
5232
5233 // Don't support the general case for now.
5234 return false;
5235}
5236
5238 auto *NewMI = buildSDivUsingMul(MI);
5239 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5240}
5241
5243 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5244 auto &SDiv = cast<GenericMachineInstr>(MI);
5245 Register Dst = SDiv.getReg(0);
5246 Register LHS = SDiv.getReg(1);
5247 Register RHS = SDiv.getReg(2);
5248 LLT Ty = MRI.getType(Dst);
5249 LLT ScalarTy = Ty.getScalarType();
5251 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5252 auto &MIB = Builder;
5253
5254 bool UseSRA = false;
5255 SmallVector<Register, 16> Shifts, Factors;
5256
5257 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5258 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5259
5260 auto BuildSDIVPattern = [&](const Constant *C) {
5261 // Don't recompute inverses for each splat element.
5262 if (IsSplat && !Factors.empty()) {
5263 Shifts.push_back(Shifts[0]);
5264 Factors.push_back(Factors[0]);
5265 return true;
5266 }
5267
5268 auto *CI = cast<ConstantInt>(C);
5269 APInt Divisor = CI->getValue();
5270 unsigned Shift = Divisor.countr_zero();
5271 if (Shift) {
5272 Divisor.ashrInPlace(Shift);
5273 UseSRA = true;
5274 }
5275
5276 // Calculate the multiplicative inverse modulo BW.
5277 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5278 APInt Factor = Divisor.multiplicativeInverse();
5279 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5280 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5281 return true;
5282 };
5283
5284 // Collect all magic values from the build vector.
5285 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5286 (void)Matched;
5287 assert(Matched && "Expected unary predicate match to succeed");
5288
5289 Register Shift, Factor;
5290 if (Ty.isVector()) {
5291 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5292 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5293 } else {
5294 Shift = Shifts[0];
5295 Factor = Factors[0];
5296 }
5297
5298 Register Res = LHS;
5299
5300 if (UseSRA)
5301 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5302
5303 return MIB.buildMul(Ty, Res, Factor);
5304}
5305
5307 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5308 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5309 "Expected SDIV or UDIV");
5310 auto &Div = cast<GenericMachineInstr>(MI);
5311 Register RHS = Div.getReg(2);
5312 auto MatchPow2 = [&](const Constant *C) {
5313 auto *CI = dyn_cast<ConstantInt>(C);
5314 return CI && (CI->getValue().isPowerOf2() ||
5315 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5316 };
5317 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5318}
5319
5321 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5322 auto &SDiv = cast<GenericMachineInstr>(MI);
5323 Register Dst = SDiv.getReg(0);
5324 Register LHS = SDiv.getReg(1);
5325 Register RHS = SDiv.getReg(2);
5326 LLT Ty = MRI.getType(Dst);
5328 LLT CCVT =
5329 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5330
5331 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5332 // to the following version:
5333 //
5334 // %c1 = G_CTTZ %rhs
5335 // %inexact = G_SUB $bitwidth, %c1
5336 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5337 // %lshr = G_LSHR %sign, %inexact
5338 // %add = G_ADD %lhs, %lshr
5339 // %ashr = G_ASHR %add, %c1
5340 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5341 // %zero = G_CONSTANT $0
5342 // %neg = G_NEG %ashr
5343 // %isneg = G_ICMP SLT %rhs, %zero
5344 // %res = G_SELECT %isneg, %neg, %ashr
5345
5346 unsigned BitWidth = Ty.getScalarSizeInBits();
5347 auto Zero = Builder.buildConstant(Ty, 0);
5348
5349 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5350 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5351 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5352 // Splat the sign bit into the register
5353 auto Sign = Builder.buildAShr(
5354 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5355
5356 // Add (LHS < 0) ? abs2 - 1 : 0;
5357 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5358 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5359 auto AShr = Builder.buildAShr(Ty, Add, C1);
5360
5361 // Special case: (sdiv X, 1) -> X
5362 // Special Case: (sdiv X, -1) -> 0-X
5363 auto One = Builder.buildConstant(Ty, 1);
5364 auto MinusOne = Builder.buildConstant(Ty, -1);
5365 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5366 auto IsMinusOne =
5368 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5369 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5370
5371 // If divided by a positive value, we're done. Otherwise, the result must be
5372 // negated.
5373 auto Neg = Builder.buildNeg(Ty, AShr);
5374 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5375 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5376 MI.eraseFromParent();
5377}
5378
5380 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5381 auto &UDiv = cast<GenericMachineInstr>(MI);
5382 Register Dst = UDiv.getReg(0);
5383 Register LHS = UDiv.getReg(1);
5384 Register RHS = UDiv.getReg(2);
5385 LLT Ty = MRI.getType(Dst);
5387
5388 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5389 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5390 MI.eraseFromParent();
5391}
5392
5394 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5395 Register RHS = MI.getOperand(2).getReg();
5396 Register Dst = MI.getOperand(0).getReg();
5397 LLT Ty = MRI.getType(Dst);
5399 auto MatchPow2ExceptOne = [&](const Constant *C) {
5400 if (auto *CI = dyn_cast<ConstantInt>(C))
5401 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5402 return false;
5403 };
5404 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5405 return false;
5406 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5407}
5408
5410 Register LHS = MI.getOperand(1).getReg();
5411 Register RHS = MI.getOperand(2).getReg();
5412 Register Dst = MI.getOperand(0).getReg();
5413 LLT Ty = MRI.getType(Dst);
5415 unsigned NumEltBits = Ty.getScalarSizeInBits();
5416
5417 auto LogBase2 = buildLogBase2(RHS, Builder);
5418 auto ShiftAmt =
5419 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5420 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5421 Builder.buildLShr(Dst, LHS, Trunc);
5422 MI.eraseFromParent();
5423}
5424
5426 BuildFnTy &MatchInfo) {
5427 unsigned Opc = MI.getOpcode();
5428 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5429 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5430 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5431
5432 Register Dst = MI.getOperand(0).getReg();
5433 Register X = MI.getOperand(1).getReg();
5434 Register Y = MI.getOperand(2).getReg();
5435 LLT Type = MRI.getType(Dst);
5436
5437 // fold (fadd x, fneg(y)) -> (fsub x, y)
5438 // fold (fadd fneg(y), x) -> (fsub x, y)
5439 // G_ADD is commutative so both cases are checked by m_GFAdd
5440 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5441 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5442 Opc = TargetOpcode::G_FSUB;
5443 }
5444 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5445 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5446 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5447 Opc = TargetOpcode::G_FADD;
5448 }
5449 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5450 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5451 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5452 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5453 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5454 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5455 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5456 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5457 // no opcode change
5458 } else
5459 return false;
5460
5461 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5463 MI.setDesc(B.getTII().get(Opc));
5464 MI.getOperand(1).setReg(X);
5465 MI.getOperand(2).setReg(Y);
5467 };
5468 return true;
5469}
5470
5472 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5473
5474 Register LHS = MI.getOperand(1).getReg();
5475 MatchInfo = MI.getOperand(2).getReg();
5476 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5477
5478 const auto LHSCst = Ty.isVector()
5479 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5481 if (!LHSCst)
5482 return false;
5483
5484 // -0.0 is always allowed
5485 if (LHSCst->Value.isNegZero())
5486 return true;
5487
5488 // +0.0 is only allowed if nsz is set.
5489 if (LHSCst->Value.isPosZero())
5490 return MI.getFlag(MachineInstr::FmNsz);
5491
5492 return false;
5493}
5494
5496 Register Dst = MI.getOperand(0).getReg();
5498 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5499 eraseInst(MI);
5500}
5501
5502/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5503/// due to global flags or MachineInstr flags.
5504static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5505 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5506 return false;
5507 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5508}
5509
5510static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5511 const MachineRegisterInfo &MRI) {
5512 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5513 MRI.use_instr_nodbg_end()) >
5514 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5515 MRI.use_instr_nodbg_end());
5516}
5517
5519 bool &AllowFusionGlobally,
5520 bool &HasFMAD, bool &Aggressive,
5521 bool CanReassociate) {
5522
5523 auto *MF = MI.getMF();
5524 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5525 const TargetOptions &Options = MF->getTarget().Options;
5526 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5527
5528 if (CanReassociate &&
5529 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5530 return false;
5531
5532 // Floating-point multiply-add with intermediate rounding.
5533 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5534 // Floating-point multiply-add without intermediate rounding.
5535 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5536 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5537 // No valid opcode, do not combine.
5538 if (!HasFMAD && !HasFMA)
5539 return false;
5540
5541 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5542 Options.UnsafeFPMath || HasFMAD;
5543 // If the addition is not contractable, do not combine.
5544 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5545 return false;
5546
5547 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5548 return true;
5549}
5550
5552 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5553 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5554
5555 bool AllowFusionGlobally, HasFMAD, Aggressive;
5556 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5557 return false;
5558
5559 Register Op1 = MI.getOperand(1).getReg();
5560 Register Op2 = MI.getOperand(2).getReg();
5563 unsigned PreferredFusedOpcode =
5564 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5565
5566 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5567 // prefer to fold the multiply with fewer uses.
5568 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5569 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5570 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5571 std::swap(LHS, RHS);
5572 }
5573
5574 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5575 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5576 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5577 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5578 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5579 {LHS.MI->getOperand(1).getReg(),
5580 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5581 };
5582 return true;
5583 }
5584
5585 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5586 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5587 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5588 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5589 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5590 {RHS.MI->getOperand(1).getReg(),
5591 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5592 };
5593 return true;
5594 }
5595
5596 return false;
5597}
5598
5600 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5601 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5602
5603 bool AllowFusionGlobally, HasFMAD, Aggressive;
5604 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5605 return false;
5606
5607 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5608 Register Op1 = MI.getOperand(1).getReg();
5609 Register Op2 = MI.getOperand(2).getReg();
5612 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5613
5614 unsigned PreferredFusedOpcode =
5615 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5616
5617 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5618 // prefer to fold the multiply with fewer uses.
5619 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5620 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5621 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5622 std::swap(LHS, RHS);
5623 }
5624
5625 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5626 MachineInstr *FpExtSrc;
5627 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5628 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5629 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5630 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5631 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5632 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5633 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5634 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5635 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5636 };
5637 return true;
5638 }
5639
5640 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5641 // Note: Commutes FADD operands.
5642 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5643 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5644 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5645 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5646 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5647 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5648 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5649 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5650 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5651 };
5652 return true;
5653 }
5654
5655 return false;
5656}
5657
5659 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5660 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5661
5662 bool AllowFusionGlobally, HasFMAD, Aggressive;
5663 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5664 return false;
5665
5666 Register Op1 = MI.getOperand(1).getReg();
5667 Register Op2 = MI.getOperand(2).getReg();
5670 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5671
5672 unsigned PreferredFusedOpcode =
5673 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5674
5675 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5676 // prefer to fold the multiply with fewer uses.
5677 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5678 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5679 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5680 std::swap(LHS, RHS);
5681 }
5682
5683 MachineInstr *FMA = nullptr;
5684 Register Z;
5685 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5686 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5687 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5688 TargetOpcode::G_FMUL) &&
5689 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5690 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5691 FMA = LHS.MI;
5692 Z = RHS.Reg;
5693 }
5694 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5695 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5696 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5697 TargetOpcode::G_FMUL) &&
5698 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5699 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5700 Z = LHS.Reg;
5701 FMA = RHS.MI;
5702 }
5703
5704 if (FMA) {
5705 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5706 Register X = FMA->getOperand(1).getReg();
5707 Register Y = FMA->getOperand(2).getReg();
5708 Register U = FMulMI->getOperand(1).getReg();
5709 Register V = FMulMI->getOperand(2).getReg();
5710
5711 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5712 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5713 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5714 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5715 {X, Y, InnerFMA});
5716 };
5717 return true;
5718 }
5719
5720 return false;
5721}
5722
5724 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5725 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5726
5727 bool AllowFusionGlobally, HasFMAD, Aggressive;
5728 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5729 return false;
5730
5731 if (!Aggressive)
5732 return false;
5733
5734 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5735 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5736 Register Op1 = MI.getOperand(1).getReg();
5737 Register Op2 = MI.getOperand(2).getReg();
5740
5741 unsigned PreferredFusedOpcode =
5742 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5743
5744 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5745 // prefer to fold the multiply with fewer uses.
5746 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5747 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5748 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5749 std::swap(LHS, RHS);
5750 }
5751
5752 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5753 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5755 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5756 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5757 Register InnerFMA =
5758 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5759 .getReg(0);
5760 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5761 {X, Y, InnerFMA});
5762 };
5763
5764 MachineInstr *FMulMI, *FMAMI;
5765 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5766 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5767 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5768 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5769 m_GFPExt(m_MInstr(FMulMI))) &&
5770 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5771 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5772 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5773 MatchInfo = [=](MachineIRBuilder &B) {
5774 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5775 FMulMI->getOperand(2).getReg(), RHS.Reg,
5776 LHS.MI->getOperand(1).getReg(),
5777 LHS.MI->getOperand(2).getReg(), B);
5778 };
5779 return true;
5780 }
5781
5782 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5783 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5784 // FIXME: This turns two single-precision and one double-precision
5785 // operation into two double-precision operations, which might not be
5786 // interesting for all targets, especially GPUs.
5787 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5788 FMAMI->getOpcode() == PreferredFusedOpcode) {
5789 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5790 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5791 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5792 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5793 MatchInfo = [=](MachineIRBuilder &B) {
5794 Register X = FMAMI->getOperand(1).getReg();
5795 Register Y = FMAMI->getOperand(2).getReg();
5796 X = B.buildFPExt(DstType, X).getReg(0);
5797 Y = B.buildFPExt(DstType, Y).getReg(0);
5798 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5799 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5800 };
5801
5802 return true;
5803 }
5804 }
5805
5806 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5807 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5808 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5809 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5810 m_GFPExt(m_MInstr(FMulMI))) &&
5811 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5812 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5813 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5814 MatchInfo = [=](MachineIRBuilder &B) {
5815 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5816 FMulMI->getOperand(2).getReg(), LHS.Reg,
5817 RHS.MI->getOperand(1).getReg(),
5818 RHS.MI->getOperand(2).getReg(), B);
5819 };
5820 return true;
5821 }
5822
5823 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5824 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5825 // FIXME: This turns two single-precision and one double-precision
5826 // operation into two double-precision operations, which might not be
5827 // interesting for all targets, especially GPUs.
5828 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5829 FMAMI->getOpcode() == PreferredFusedOpcode) {
5830 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5831 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5832 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5833 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5834 MatchInfo = [=](MachineIRBuilder &B) {
5835 Register X = FMAMI->getOperand(1).getReg();
5836 Register Y = FMAMI->getOperand(2).getReg();
5837 X = B.buildFPExt(DstType, X).getReg(0);
5838 Y = B.buildFPExt(DstType, Y).getReg(0);
5839 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5840 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5841 };
5842 return true;
5843 }
5844 }
5845
5846 return false;
5847}
5848
5850 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5851 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5852
5853 bool AllowFusionGlobally, HasFMAD, Aggressive;
5854 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5855 return false;
5856
5857 Register Op1 = MI.getOperand(1).getReg();
5858 Register Op2 = MI.getOperand(2).getReg();
5861 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5862
5863 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5864 // prefer to fold the multiply with fewer uses.
5865 int FirstMulHasFewerUses = true;
5866 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5867 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5868 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5869 FirstMulHasFewerUses = false;
5870
5871 unsigned PreferredFusedOpcode =
5872 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5873
5874 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5875 if (FirstMulHasFewerUses &&
5876 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5877 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5878 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5879 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5880 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5881 {LHS.MI->getOperand(1).getReg(),
5882 LHS.MI->getOperand(2).getReg(), NegZ});
5883 };
5884 return true;
5885 }
5886 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5887 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5888 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5889 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5890 Register NegY =
5891 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5892 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5893 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5894 };
5895 return true;
5896 }
5897
5898 return false;
5899}
5900
5902 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5903 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5904
5905 bool AllowFusionGlobally, HasFMAD, Aggressive;
5906 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5907 return false;
5908
5909 Register LHSReg = MI.getOperand(1).getReg();
5910 Register RHSReg = MI.getOperand(2).getReg();
5911 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5912
5913 unsigned PreferredFusedOpcode =
5914 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5915
5916 MachineInstr *FMulMI;
5917 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5918 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5919 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5920 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5921 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5922 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5923 Register NegX =
5924 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5925 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5926 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5927 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5928 };
5929 return true;
5930 }
5931
5932 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5933 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5934 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5935 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5936 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5937 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5938 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5939 {FMulMI->getOperand(1).getReg(),
5940 FMulMI->getOperand(2).getReg(), LHSReg});
5941 };
5942 return true;
5943 }
5944
5945 return false;
5946}
5947
5949 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5950 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5951
5952 bool AllowFusionGlobally, HasFMAD, Aggressive;
5953 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5954 return false;
5955
5956 Register LHSReg = MI.getOperand(1).getReg();
5957 Register RHSReg = MI.getOperand(2).getReg();
5958 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5959
5960 unsigned PreferredFusedOpcode =
5961 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5962
5963 MachineInstr *FMulMI;
5964 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5965 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5966 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5967 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
5968 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5969 Register FpExtX =
5970 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5971 Register FpExtY =
5972 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5973 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5974 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5975 {FpExtX, FpExtY, NegZ});
5976 };
5977 return true;
5978 }
5979
5980 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5981 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5982 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5983 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
5984 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5985 Register FpExtY =
5986 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5987 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
5988 Register FpExtZ =
5989 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5990 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5991 {NegY, FpExtZ, LHSReg});
5992 };
5993 return true;
5994 }
5995
5996 return false;
5997}
5998
6000 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6001 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6002
6003 bool AllowFusionGlobally, HasFMAD, Aggressive;
6004 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6005 return false;
6006
6007 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6008 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6009 Register LHSReg = MI.getOperand(1).getReg();
6010 Register RHSReg = MI.getOperand(2).getReg();
6011
6012 unsigned PreferredFusedOpcode =
6013 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6014
6015 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6017 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6018 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6019 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6020 };
6021
6022 MachineInstr *FMulMI;
6023 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6024 // (fneg (fma (fpext x), (fpext y), z))
6025 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6026 // (fneg (fma (fpext x), (fpext y), z))
6027 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6028 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6029 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6030 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6031 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6032 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6034 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6035 FMulMI->getOperand(2).getReg(), RHSReg, B);
6036 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6037 };
6038 return true;
6039 }
6040
6041 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6042 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6043 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6044 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6045 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6046 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6047 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6048 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6049 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6050 FMulMI->getOperand(2).getReg(), LHSReg, B);
6051 };
6052 return true;
6053 }
6054
6055 return false;
6056}
6057
6059 unsigned &IdxToPropagate) {
6060 bool PropagateNaN;
6061 switch (MI.getOpcode()) {
6062 default:
6063 return false;
6064 case TargetOpcode::G_FMINNUM:
6065 case TargetOpcode::G_FMAXNUM:
6066 PropagateNaN = false;
6067 break;
6068 case TargetOpcode::G_FMINIMUM:
6069 case TargetOpcode::G_FMAXIMUM:
6070 PropagateNaN = true;
6071 break;
6072 }
6073
6074 auto MatchNaN = [&](unsigned Idx) {
6075 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6076 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6077 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6078 return false;
6079 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6080 return true;
6081 };
6082
6083 return MatchNaN(1) || MatchNaN(2);
6084}
6085
6087 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6088 Register LHS = MI.getOperand(1).getReg();
6089 Register RHS = MI.getOperand(2).getReg();
6090
6091 // Helper lambda to check for opportunities for
6092 // A + (B - A) -> B
6093 // (B - A) + A -> B
6094 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6095 Register Reg;
6096 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6097 Reg == MaybeSameReg;
6098 };
6099 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6100}
6101
6103 Register &MatchInfo) {
6104 // This combine folds the following patterns:
6105 //
6106 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6107 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6108 // into
6109 // x
6110 // if
6111 // k == sizeof(VecEltTy)/2
6112 // type(x) == type(dst)
6113 //
6114 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6115 // into
6116 // x
6117 // if
6118 // type(x) == type(dst)
6119
6120 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6121 LLT DstEltTy = DstVecTy.getElementType();
6122
6123 Register Lo, Hi;
6124
6125 if (mi_match(
6126 MI, MRI,
6128 MatchInfo = Lo;
6129 return MRI.getType(MatchInfo) == DstVecTy;
6130 }
6131
6132 std::optional<ValueAndVReg> ShiftAmount;
6133 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6134 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6135 if (mi_match(
6136 MI, MRI,
6137 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6138 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6139 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6140 MatchInfo = Lo;
6141 return MRI.getType(MatchInfo) == DstVecTy;
6142 }
6143 }
6144
6145 return false;
6146}
6147
6149 Register &MatchInfo) {
6150 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6151 // if type(x) == type(G_TRUNC)
6152 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6153 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6154 return false;
6155
6156 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6157}
6158
6160 Register &MatchInfo) {
6161 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6162 // y if K == size of vector element type
6163 std::optional<ValueAndVReg> ShiftAmt;
6164 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6166 m_GCst(ShiftAmt))))
6167 return false;
6168
6169 LLT MatchTy = MRI.getType(MatchInfo);
6170 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6171 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6172}
6173
6174unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6175 CmpInst::Predicate Pred, LLT DstTy,
6176 SelectPatternNaNBehaviour VsNaNRetVal) const {
6177 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6178 "Expected a NaN behaviour?");
6179 // Choose an opcode based off of legality or the behaviour when one of the
6180 // LHS/RHS may be NaN.
6181 switch (Pred) {
6182 default:
6183 return 0;
6184 case CmpInst::FCMP_UGT:
6185 case CmpInst::FCMP_UGE:
6186 case CmpInst::FCMP_OGT:
6187 case CmpInst::FCMP_OGE:
6188 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6189 return TargetOpcode::G_FMAXNUM;
6190 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6191 return TargetOpcode::G_FMAXIMUM;
6192 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6193 return TargetOpcode::G_FMAXNUM;
6194 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6195 return TargetOpcode::G_FMAXIMUM;
6196 return 0;
6197 case CmpInst::FCMP_ULT:
6198 case CmpInst::FCMP_ULE:
6199 case CmpInst::FCMP_OLT:
6200 case CmpInst::FCMP_OLE:
6201 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6202 return TargetOpcode::G_FMINNUM;
6203 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6204 return TargetOpcode::G_FMINIMUM;
6205 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6206 return TargetOpcode::G_FMINNUM;
6207 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6208 return 0;
6209 return TargetOpcode::G_FMINIMUM;
6210 }
6211}
6212
6213CombinerHelper::SelectPatternNaNBehaviour
6214CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6215 bool IsOrderedComparison) const {
6216 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6217 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6218 // Completely unsafe.
6219 if (!LHSSafe && !RHSSafe)
6220 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6221 if (LHSSafe && RHSSafe)
6222 return SelectPatternNaNBehaviour::RETURNS_ANY;
6223 // An ordered comparison will return false when given a NaN, so it
6224 // returns the RHS.
6225 if (IsOrderedComparison)
6226 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6227 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6228 // An unordered comparison will return true when given a NaN, so it
6229 // returns the LHS.
6230 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6231 : SelectPatternNaNBehaviour::RETURNS_NAN;
6232}
6233
6234bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6235 Register TrueVal, Register FalseVal,
6236 BuildFnTy &MatchInfo) {
6237 // Match: select (fcmp cond x, y) x, y
6238 // select (fcmp cond x, y) y, x
6239 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6240 LLT DstTy = MRI.getType(Dst);
6241 // Bail out early on pointers, since we'll never want to fold to a min/max.
6242 if (DstTy.isPointer())
6243 return false;
6244 // Match a floating point compare with a less-than/greater-than predicate.
6245 // TODO: Allow multiple users of the compare if they are all selects.
6246 CmpInst::Predicate Pred;
6247 Register CmpLHS, CmpRHS;
6248 if (!mi_match(Cond, MRI,
6250 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6251 CmpInst::isEquality(Pred))
6252 return false;
6253 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6254 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6255 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6256 return false;
6257 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6258 std::swap(CmpLHS, CmpRHS);
6259 Pred = CmpInst::getSwappedPredicate(Pred);
6260 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6261 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6262 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6263 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6264 }
6265 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6266 return false;
6267 // Decide what type of max/min this should be based off of the predicate.
6268 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6269 if (!Opc || !isLegal({Opc, {DstTy}}))
6270 return false;
6271 // Comparisons between signed zero and zero may have different results...
6272 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6273 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6274 // We don't know if a comparison between two 0s will give us a consistent
6275 // result. Be conservative and only proceed if at least one side is
6276 // non-zero.
6277 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6278 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6279 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6280 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6281 return false;
6282 }
6283 }
6284 MatchInfo = [=](MachineIRBuilder &B) {
6285 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6286 };
6287 return true;
6288}
6289
6291 BuildFnTy &MatchInfo) {
6292 // TODO: Handle integer cases.
6293 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6294 // Condition may be fed by a truncated compare.
6295 Register Cond = MI.getOperand(1).getReg();
6296 Register MaybeTrunc;
6297 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6298 Cond = MaybeTrunc;
6299 Register Dst = MI.getOperand(0).getReg();
6300 Register TrueVal = MI.getOperand(2).getReg();
6301 Register FalseVal = MI.getOperand(3).getReg();
6302 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6303}
6304
6306 BuildFnTy &MatchInfo) {
6307 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6308 // (X + Y) == X --> Y == 0
6309 // (X + Y) != X --> Y != 0
6310 // (X - Y) == X --> Y == 0
6311 // (X - Y) != X --> Y != 0
6312 // (X ^ Y) == X --> Y == 0
6313 // (X ^ Y) != X --> Y != 0
6314 Register Dst = MI.getOperand(0).getReg();
6315 CmpInst::Predicate Pred;
6316 Register X, Y, OpLHS, OpRHS;
6317 bool MatchedSub = mi_match(
6318 Dst, MRI,
6319 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6320 if (MatchedSub && X != OpLHS)
6321 return false;
6322 if (!MatchedSub) {
6323 if (!mi_match(Dst, MRI,
6324 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6325 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6326 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6327 return false;
6328 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6329 }
6330 MatchInfo = [=](MachineIRBuilder &B) {
6331 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6332 B.buildICmp(Pred, Dst, Y, Zero);
6333 };
6334 return CmpInst::isEquality(Pred) && Y.isValid();
6335}
6336
6338 Register ShiftReg = MI.getOperand(2).getReg();
6339 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6340 auto IsShiftTooBig = [&](const Constant *C) {
6341 auto *CI = dyn_cast<ConstantInt>(C);
6342 return CI && CI->uge(ResTy.getScalarSizeInBits());
6343 };
6344 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6345}
6346
6348 unsigned LHSOpndIdx = 1;
6349 unsigned RHSOpndIdx = 2;
6350 switch (MI.getOpcode()) {
6351 case TargetOpcode::G_UADDO:
6352 case TargetOpcode::G_SADDO:
6353 case TargetOpcode::G_UMULO:
6354 case TargetOpcode::G_SMULO:
6355 LHSOpndIdx = 2;
6356 RHSOpndIdx = 3;
6357 break;
6358 default:
6359 break;
6360 }
6361 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6362 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6363 if (!getIConstantVRegVal(LHS, MRI)) {
6364 // Skip commuting if LHS is not a constant. But, LHS may be a
6365 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6366 // have a constant on the RHS.
6367 if (MRI.getVRegDef(LHS)->getOpcode() !=
6368 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6369 return false;
6370 }
6371 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6372 return MRI.getVRegDef(RHS)->getOpcode() !=
6373 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6375}
6376
6378 Register LHS = MI.getOperand(1).getReg();
6379 Register RHS = MI.getOperand(2).getReg();
6380 std::optional<FPValueAndVReg> ValAndVReg;
6381 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6382 return false;
6383 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6384}
6385
6388 unsigned LHSOpndIdx = 1;
6389 unsigned RHSOpndIdx = 2;
6390 switch (MI.getOpcode()) {
6391 case TargetOpcode::G_UADDO:
6392 case TargetOpcode::G_SADDO:
6393 case TargetOpcode::G_UMULO:
6394 case TargetOpcode::G_SMULO:
6395 LHSOpndIdx = 2;
6396 RHSOpndIdx = 3;
6397 break;
6398 default:
6399 break;
6400 }
6401 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6402 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6403 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6404 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6406}
6407
6408bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6409 LLT SrcTy = MRI.getType(Src);
6410 if (SrcTy.isFixedVector())
6411 return isConstantSplatVector(Src, 1, AllowUndefs);
6412 if (SrcTy.isScalar()) {
6413 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6414 return true;
6415 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6416 return IConstant && IConstant->Value == 1;
6417 }
6418 return false; // scalable vector
6419}
6420
6421bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6422 LLT SrcTy = MRI.getType(Src);
6423 if (SrcTy.isFixedVector())
6424 return isConstantSplatVector(Src, 0, AllowUndefs);
6425 if (SrcTy.isScalar()) {
6426 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6427 return true;
6428 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6429 return IConstant && IConstant->Value == 0;
6430 }
6431 return false; // scalable vector
6432}
6433
6434// Ignores COPYs during conformance checks.
6435// FIXME scalable vectors.
6436bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6437 bool AllowUndefs) {
6438 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6439 if (!BuildVector)
6440 return false;
6441 unsigned NumSources = BuildVector->getNumSources();
6442
6443 for (unsigned I = 0; I < NumSources; ++I) {
6444 GImplicitDef *ImplicitDef =
6445 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6446 if (ImplicitDef && AllowUndefs)
6447 continue;
6448 if (ImplicitDef && !AllowUndefs)
6449 return false;
6450 std::optional<ValueAndVReg> IConstant =
6452 if (IConstant && IConstant->Value == SplatValue)
6453 continue;
6454 return false;
6455 }
6456 return true;
6457}
6458
6459// Ignores COPYs during lookups.
6460// FIXME scalable vectors
6461std::optional<APInt>
6462CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6463 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6464 if (IConstant)
6465 return IConstant->Value;
6466
6467 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6468 if (!BuildVector)
6469 return std::nullopt;
6470 unsigned NumSources = BuildVector->getNumSources();
6471
6472 std::optional<APInt> Value = std::nullopt;
6473 for (unsigned I = 0; I < NumSources; ++I) {
6474 std::optional<ValueAndVReg> IConstant =
6476 if (!IConstant)
6477 return std::nullopt;
6478 if (!Value)
6479 Value = IConstant->Value;
6480 else if (*Value != IConstant->Value)
6481 return std::nullopt;
6482 }
6483 return Value;
6484}
6485
6486// FIXME G_SPLAT_VECTOR
6487bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6488 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6489 if (IConstant)
6490 return true;
6491
6492 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6493 if (!BuildVector)
6494 return false;
6495
6496 unsigned NumSources = BuildVector->getNumSources();
6497 for (unsigned I = 0; I < NumSources; ++I) {
6498 std::optional<ValueAndVReg> IConstant =
6500 if (!IConstant)
6501 return false;
6502 }
6503 return true;
6504}
6505
6506// TODO: use knownbits to determine zeros
6507bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6508 BuildFnTy &MatchInfo) {
6509 uint32_t Flags = Select->getFlags();
6510 Register Dest = Select->getReg(0);
6511 Register Cond = Select->getCondReg();
6512 Register True = Select->getTrueReg();
6513 Register False = Select->getFalseReg();
6514 LLT CondTy = MRI.getType(Select->getCondReg());
6515 LLT TrueTy = MRI.getType(Select->getTrueReg());
6516
6517 // We only do this combine for scalar boolean conditions.
6518 if (CondTy != LLT::scalar(1))
6519 return false;
6520
6521 if (TrueTy.isPointer())
6522 return false;
6523
6524 // Both are scalars.
6525 std::optional<ValueAndVReg> TrueOpt =
6527 std::optional<ValueAndVReg> FalseOpt =
6529
6530 if (!TrueOpt || !FalseOpt)
6531 return false;
6532
6533 APInt TrueValue = TrueOpt->Value;
6534 APInt FalseValue = FalseOpt->Value;
6535
6536 // select Cond, 1, 0 --> zext (Cond)
6537 if (TrueValue.isOne() && FalseValue.isZero()) {
6538 MatchInfo = [=](MachineIRBuilder &B) {
6539 B.setInstrAndDebugLoc(*Select);
6540 B.buildZExtOrTrunc(Dest, Cond);
6541 };
6542 return true;
6543 }
6544
6545 // select Cond, -1, 0 --> sext (Cond)
6546 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6547 MatchInfo = [=](MachineIRBuilder &B) {
6548 B.setInstrAndDebugLoc(*Select);
6549 B.buildSExtOrTrunc(Dest, Cond);
6550 };
6551 return true;
6552 }
6553
6554 // select Cond, 0, 1 --> zext (!Cond)
6555 if (TrueValue.isZero() && FalseValue.isOne()) {
6556 MatchInfo = [=](MachineIRBuilder &B) {
6557 B.setInstrAndDebugLoc(*Select);
6559 B.buildNot(Inner, Cond);
6560 B.buildZExtOrTrunc(Dest, Inner);
6561 };
6562 return true;
6563 }
6564
6565 // select Cond, 0, -1 --> sext (!Cond)
6566 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6567 MatchInfo = [=](MachineIRBuilder &B) {
6568 B.setInstrAndDebugLoc(*Select);
6570 B.buildNot(Inner, Cond);
6571 B.buildSExtOrTrunc(Dest, Inner);
6572 };
6573 return true;
6574 }
6575
6576 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6577 if (TrueValue - 1 == FalseValue) {
6578 MatchInfo = [=](MachineIRBuilder &B) {
6579 B.setInstrAndDebugLoc(*Select);
6581 B.buildZExtOrTrunc(Inner, Cond);
6582 B.buildAdd(Dest, Inner, False);
6583 };
6584 return true;
6585 }
6586
6587 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6588 if (TrueValue + 1 == FalseValue) {
6589 MatchInfo = [=](MachineIRBuilder &B) {
6590 B.setInstrAndDebugLoc(*Select);
6592 B.buildSExtOrTrunc(Inner, Cond);
6593 B.buildAdd(Dest, Inner, False);
6594 };
6595 return true;
6596 }
6597
6598 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6599 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6600 MatchInfo = [=](MachineIRBuilder &B) {
6601 B.setInstrAndDebugLoc(*Select);
6603 B.buildZExtOrTrunc(Inner, Cond);
6604 // The shift amount must be scalar.
6605 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6606 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6607 B.buildShl(Dest, Inner, ShAmtC, Flags);
6608 };
6609 return true;
6610 }
6611 // select Cond, -1, C --> or (sext Cond), C
6612 if (TrueValue.isAllOnes()) {
6613 MatchInfo = [=](MachineIRBuilder &B) {
6614 B.setInstrAndDebugLoc(*Select);
6616 B.buildSExtOrTrunc(Inner, Cond);
6617 B.buildOr(Dest, Inner, False, Flags);
6618 };
6619 return true;
6620 }
6621
6622 // select Cond, C, -1 --> or (sext (not Cond)), C
6623 if (FalseValue.isAllOnes()) {
6624 MatchInfo = [=](MachineIRBuilder &B) {
6625 B.setInstrAndDebugLoc(*Select);
6627 B.buildNot(Not, Cond);
6629 B.buildSExtOrTrunc(Inner, Not);
6630 B.buildOr(Dest, Inner, True, Flags);
6631 };
6632 return true;
6633 }
6634
6635 return false;
6636}
6637
6638// TODO: use knownbits to determine zeros
6639bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6640 BuildFnTy &MatchInfo) {
6641 uint32_t Flags = Select->getFlags();
6642 Register DstReg = Select->getReg(0);
6643 Register Cond = Select->getCondReg();
6644 Register True = Select->getTrueReg();
6645 Register False = Select->getFalseReg();
6646 LLT CondTy = MRI.getType(Select->getCondReg());
6647 LLT TrueTy = MRI.getType(Select->getTrueReg());
6648
6649 // Boolean or fixed vector of booleans.
6650 if (CondTy.isScalableVector() ||
6651 (CondTy.isFixedVector() &&
6652 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6653 CondTy.getScalarSizeInBits() != 1)
6654 return false;
6655
6656 if (CondTy != TrueTy)
6657 return false;
6658
6659 // select Cond, Cond, F --> or Cond, F
6660 // select Cond, 1, F --> or Cond, F
6661 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6662 MatchInfo = [=](MachineIRBuilder &B) {
6663 B.setInstrAndDebugLoc(*Select);
6665 B.buildZExtOrTrunc(Ext, Cond);
6666 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6667 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6668 };
6669 return true;
6670 }
6671
6672 // select Cond, T, Cond --> and Cond, T
6673 // select Cond, T, 0 --> and Cond, T
6674 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6675 MatchInfo = [=](MachineIRBuilder &B) {
6676 B.setInstrAndDebugLoc(*Select);
6678 B.buildZExtOrTrunc(Ext, Cond);
6679 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6680 B.buildAnd(DstReg, Ext, FreezeTrue);
6681 };
6682 return true;
6683 }
6684
6685 // select Cond, T, 1 --> or (not Cond), T
6686 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6687 MatchInfo = [=](MachineIRBuilder &B) {
6688 B.setInstrAndDebugLoc(*Select);
6689 // First the not.
6691 B.buildNot(Inner, Cond);
6692 // Then an ext to match the destination register.
6694 B.buildZExtOrTrunc(Ext, Inner);
6695 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6696 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6697 };
6698 return true;
6699 }
6700
6701 // select Cond, 0, F --> and (not Cond), F
6702 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6703 MatchInfo = [=](MachineIRBuilder &B) {
6704 B.setInstrAndDebugLoc(*Select);
6705 // First the not.
6707 B.buildNot(Inner, Cond);
6708 // Then an ext to match the destination register.
6710 B.buildZExtOrTrunc(Ext, Inner);
6711 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6712 B.buildAnd(DstReg, Ext, FreezeFalse);
6713 };
6714 return true;
6715 }
6716
6717 return false;
6718}
6719
6720bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
6721 BuildFnTy &MatchInfo) {
6722 Register DstReg = Select->getReg(0);
6723 Register Cond = Select->getCondReg();
6724 Register True = Select->getTrueReg();
6725 Register False = Select->getFalseReg();
6726 LLT DstTy = MRI.getType(DstReg);
6727
6728 if (DstTy.isPointer())
6729 return false;
6730
6731 // We need an G_ICMP on the condition register.
6732 GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
6733 if (!Cmp)
6734 return false;
6735
6736 // We want to fold the icmp and replace the select.
6737 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6738 return false;
6739
6740 CmpInst::Predicate Pred = Cmp->getCond();
6741 // We need a larger or smaller predicate for
6742 // canonicalization.
6743 if (CmpInst::isEquality(Pred))
6744 return false;
6745
6746 Register CmpLHS = Cmp->getLHSReg();
6747 Register CmpRHS = Cmp->getRHSReg();
6748
6749 // We can swap CmpLHS and CmpRHS for higher hitrate.
6750 if (True == CmpRHS && False == CmpLHS) {
6751 std::swap(CmpLHS, CmpRHS);
6752 Pred = CmpInst::getSwappedPredicate(Pred);
6753 }
6754
6755 // (icmp X, Y) ? X : Y -> integer minmax.
6756 // see matchSelectPattern in ValueTracking.
6757 // Legality between G_SELECT and integer minmax can differ.
6758 if (True == CmpLHS && False == CmpRHS) {
6759 switch (Pred) {
6760 case ICmpInst::ICMP_UGT:
6761 case ICmpInst::ICMP_UGE: {
6762 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6763 return false;
6764 MatchInfo = [=](MachineIRBuilder &B) {
6765 B.buildUMax(DstReg, True, False);
6766 };
6767 return true;
6768 }
6769 case ICmpInst::ICMP_SGT:
6770 case ICmpInst::ICMP_SGE: {
6771 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6772 return false;
6773 MatchInfo = [=](MachineIRBuilder &B) {
6774 B.buildSMax(DstReg, True, False);
6775 };
6776 return true;
6777 }
6778 case ICmpInst::ICMP_ULT:
6779 case ICmpInst::ICMP_ULE: {
6780 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6781 return false;
6782 MatchInfo = [=](MachineIRBuilder &B) {
6783 B.buildUMin(DstReg, True, False);
6784 };
6785 return true;
6786 }
6787 case ICmpInst::ICMP_SLT:
6788 case ICmpInst::ICMP_SLE: {
6789 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6790 return false;
6791 MatchInfo = [=](MachineIRBuilder &B) {
6792 B.buildSMin(DstReg, True, False);
6793 };
6794 return true;
6795 }
6796 default:
6797 return false;
6798 }
6799 }
6800
6801 return false;
6802}
6803
6805 GSelect *Select = cast<GSelect>(&MI);
6806
6807 if (tryFoldSelectOfConstants(Select, MatchInfo))
6808 return true;
6809
6810 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6811 return true;
6812
6813 if (tryFoldSelectToIntMinMax(Select, MatchInfo))
6814 return true;
6815
6816 return false;
6817}
6818
6819/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6820/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6821/// into a single comparison using range-based reasoning.
6822/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6823bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6824 BuildFnTy &MatchInfo) {
6825 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6826 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6827 Register DstReg = Logic->getReg(0);
6828 Register LHS = Logic->getLHSReg();
6829 Register RHS = Logic->getRHSReg();
6830 unsigned Flags = Logic->getFlags();
6831
6832 // We need an G_ICMP on the LHS register.
6833 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6834 if (!Cmp1)
6835 return false;
6836
6837 // We need an G_ICMP on the RHS register.
6838 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6839 if (!Cmp2)
6840 return false;
6841
6842 // We want to fold the icmps.
6843 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6844 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6845 return false;
6846
6847 APInt C1;
6848 APInt C2;
6849 std::optional<ValueAndVReg> MaybeC1 =
6851 if (!MaybeC1)
6852 return false;
6853 C1 = MaybeC1->Value;
6854
6855 std::optional<ValueAndVReg> MaybeC2 =
6857 if (!MaybeC2)
6858 return false;
6859 C2 = MaybeC2->Value;
6860
6861 Register R1 = Cmp1->getLHSReg();
6862 Register R2 = Cmp2->getLHSReg();
6863 CmpInst::Predicate Pred1 = Cmp1->getCond();
6864 CmpInst::Predicate Pred2 = Cmp2->getCond();
6865 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6866 LLT CmpOperandTy = MRI.getType(R1);
6867
6868 if (CmpOperandTy.isPointer())
6869 return false;
6870
6871 // We build ands, adds, and constants of type CmpOperandTy.
6872 // They must be legal to build.
6873 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6874 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6875 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6876 return false;
6877
6878 // Look through add of a constant offset on R1, R2, or both operands. This
6879 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6880 std::optional<APInt> Offset1;
6881 std::optional<APInt> Offset2;
6882 if (R1 != R2) {
6883 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6884 std::optional<ValueAndVReg> MaybeOffset1 =
6886 if (MaybeOffset1) {
6887 R1 = Add->getLHSReg();
6888 Offset1 = MaybeOffset1->Value;
6889 }
6890 }
6891 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6892 std::optional<ValueAndVReg> MaybeOffset2 =
6894 if (MaybeOffset2) {
6895 R2 = Add->getLHSReg();
6896 Offset2 = MaybeOffset2->Value;
6897 }
6898 }
6899 }
6900
6901 if (R1 != R2)
6902 return false;
6903
6904 // We calculate the icmp ranges including maybe offsets.
6906 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6907 if (Offset1)
6908 CR1 = CR1.subtract(*Offset1);
6909
6911 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6912 if (Offset2)
6913 CR2 = CR2.subtract(*Offset2);
6914
6915 bool CreateMask = false;
6916 APInt LowerDiff;
6917 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6918 if (!CR) {
6919 // We need non-wrapping ranges.
6920 if (CR1.isWrappedSet() || CR2.isWrappedSet())
6921 return false;
6922
6923 // Check whether we have equal-size ranges that only differ by one bit.
6924 // In that case we can apply a mask to map one range onto the other.
6925 LowerDiff = CR1.getLower() ^ CR2.getLower();
6926 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
6927 APInt CR1Size = CR1.getUpper() - CR1.getLower();
6928 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
6929 CR1Size != CR2.getUpper() - CR2.getLower())
6930 return false;
6931
6932 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
6933 CreateMask = true;
6934 }
6935
6936 if (IsAnd)
6937 CR = CR->inverse();
6938
6939 CmpInst::Predicate NewPred;
6940 APInt NewC, Offset;
6941 CR->getEquivalentICmp(NewPred, NewC, Offset);
6942
6943 // We take the result type of one of the original icmps, CmpTy, for
6944 // the to be build icmp. The operand type, CmpOperandTy, is used for
6945 // the other instructions and constants to be build. The types of
6946 // the parameters and output are the same for add and and. CmpTy
6947 // and the type of DstReg might differ. That is why we zext or trunc
6948 // the icmp into the destination register.
6949
6950 MatchInfo = [=](MachineIRBuilder &B) {
6951 if (CreateMask && Offset != 0) {
6952 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6953 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6954 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6955 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
6956 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6957 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6958 B.buildZExtOrTrunc(DstReg, ICmp);
6959 } else if (CreateMask && Offset == 0) {
6960 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6961 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6962 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6963 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
6964 B.buildZExtOrTrunc(DstReg, ICmp);
6965 } else if (!CreateMask && Offset != 0) {
6966 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6967 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
6968 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6969 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6970 B.buildZExtOrTrunc(DstReg, ICmp);
6971 } else if (!CreateMask && Offset == 0) {
6972 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6973 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
6974 B.buildZExtOrTrunc(DstReg, ICmp);
6975 } else {
6976 llvm_unreachable("unexpected configuration of CreateMask and Offset");
6977 }
6978 };
6979 return true;
6980}
6981
6982bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6983 BuildFnTy &MatchInfo) {
6984 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6985 Register DestReg = Logic->getReg(0);
6986 Register LHS = Logic->getLHSReg();
6987 Register RHS = Logic->getRHSReg();
6988 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6989
6990 // We need a compare on the LHS register.
6991 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
6992 if (!Cmp1)
6993 return false;
6994
6995 // We need a compare on the RHS register.
6996 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
6997 if (!Cmp2)
6998 return false;
6999
7000 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7001 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7002
7003 // We build one fcmp, want to fold the fcmps, replace the logic op,
7004 // and the fcmps must have the same shape.
7006 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7007 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7008 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7009 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7010 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7011 return false;
7012
7013 CmpInst::Predicate PredL = Cmp1->getCond();
7014 CmpInst::Predicate PredR = Cmp2->getCond();
7015 Register LHS0 = Cmp1->getLHSReg();
7016 Register LHS1 = Cmp1->getRHSReg();
7017 Register RHS0 = Cmp2->getLHSReg();
7018 Register RHS1 = Cmp2->getRHSReg();
7019
7020 if (LHS0 == RHS1 && LHS1 == RHS0) {
7021 // Swap RHS operands to match LHS.
7022 PredR = CmpInst::getSwappedPredicate(PredR);
7023 std::swap(RHS0, RHS1);
7024 }
7025
7026 if (LHS0 == RHS0 && LHS1 == RHS1) {
7027 // We determine the new predicate.
7028 unsigned CmpCodeL = getFCmpCode(PredL);
7029 unsigned CmpCodeR = getFCmpCode(PredR);
7030 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7031 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7032 MatchInfo = [=](MachineIRBuilder &B) {
7033 // The fcmp predicates fill the lower part of the enum.
7034 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7035 if (Pred == FCmpInst::FCMP_FALSE &&
7037 auto False = B.buildConstant(CmpTy, 0);
7038 B.buildZExtOrTrunc(DestReg, False);
7039 } else if (Pred == FCmpInst::FCMP_TRUE &&
7041 auto True =
7042 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7043 CmpTy.isVector() /*isVector*/,
7044 true /*isFP*/));
7045 B.buildZExtOrTrunc(DestReg, True);
7046 } else { // We take the predicate without predicate optimizations.
7047 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7048 B.buildZExtOrTrunc(DestReg, Cmp);
7049 }
7050 };
7051 return true;
7052 }
7053
7054 return false;
7055}
7056
7058 GAnd *And = cast<GAnd>(&MI);
7059
7060 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7061 return true;
7062
7063 if (tryFoldLogicOfFCmps(And, MatchInfo))
7064 return true;
7065
7066 return false;
7067}
7068
7070 GOr *Or = cast<GOr>(&MI);
7071
7072 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7073 return true;
7074
7075 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7076 return true;
7077
7078 return false;
7079}
7080
7082 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7083
7084 // Addo has no flags
7085 Register Dst = Add->getReg(0);
7086 Register Carry = Add->getReg(1);
7087 Register LHS = Add->getLHSReg();
7088 Register RHS = Add->getRHSReg();
7089 bool IsSigned = Add->isSigned();
7090 LLT DstTy = MRI.getType(Dst);
7091 LLT CarryTy = MRI.getType(Carry);
7092
7093 // Fold addo, if the carry is dead -> add, undef.
7094 if (MRI.use_nodbg_empty(Carry) &&
7095 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7096 MatchInfo = [=](MachineIRBuilder &B) {
7097 B.buildAdd(Dst, LHS, RHS);
7098 B.buildUndef(Carry);
7099 };
7100 return true;
7101 }
7102
7103 // Canonicalize constant to RHS.
7104 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7105 if (IsSigned) {
7106 MatchInfo = [=](MachineIRBuilder &B) {
7107 B.buildSAddo(Dst, Carry, RHS, LHS);
7108 };
7109 return true;
7110 }
7111 // !IsSigned
7112 MatchInfo = [=](MachineIRBuilder &B) {
7113 B.buildUAddo(Dst, Carry, RHS, LHS);
7114 };
7115 return true;
7116 }
7117
7118 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7119 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7120
7121 // Fold addo(c1, c2) -> c3, carry.
7122 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7124 bool Overflow;
7125 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7126 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7127 MatchInfo = [=](MachineIRBuilder &B) {
7128 B.buildConstant(Dst, Result);
7129 B.buildConstant(Carry, Overflow);
7130 };
7131 return true;
7132 }
7133
7134 // Fold (addo x, 0) -> x, no carry
7135 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7136 MatchInfo = [=](MachineIRBuilder &B) {
7137 B.buildCopy(Dst, LHS);
7138 B.buildConstant(Carry, 0);
7139 };
7140 return true;
7141 }
7142
7143 // Given 2 constant operands whose sum does not overflow:
7144 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7145 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7146 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7147 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7148 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7149 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7150 std::optional<APInt> MaybeAddRHS =
7151 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7152 if (MaybeAddRHS) {
7153 bool Overflow;
7154 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7155 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7156 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7157 if (IsSigned) {
7158 MatchInfo = [=](MachineIRBuilder &B) {
7159 auto ConstRHS = B.buildConstant(DstTy, NewC);
7160 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7161 };
7162 return true;
7163 }
7164 // !IsSigned
7165 MatchInfo = [=](MachineIRBuilder &B) {
7166 auto ConstRHS = B.buildConstant(DstTy, NewC);
7167 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7168 };
7169 return true;
7170 }
7171 }
7172 };
7173
7174 // We try to combine addo to non-overflowing add.
7175 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7177 return false;
7178
7179 // We try to combine uaddo to non-overflowing add.
7180 if (!IsSigned) {
7181 ConstantRange CRLHS =
7182 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7183 ConstantRange CRRHS =
7184 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7185
7186 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7188 return false;
7190 MatchInfo = [=](MachineIRBuilder &B) {
7191 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7192 B.buildConstant(Carry, 0);
7193 };
7194 return true;
7195 }
7198 MatchInfo = [=](MachineIRBuilder &B) {
7199 B.buildAdd(Dst, LHS, RHS);
7200 B.buildConstant(Carry, 1);
7201 };
7202 return true;
7203 }
7204 }
7205 return false;
7206 }
7207
7208 // We try to combine saddo to non-overflowing add.
7209
7210 // If LHS and RHS each have at least two sign bits, then there is no signed
7211 // overflow.
7212 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7213 MatchInfo = [=](MachineIRBuilder &B) {
7214 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7215 B.buildConstant(Carry, 0);
7216 };
7217 return true;
7218 }
7219
7220 ConstantRange CRLHS =
7221 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7222 ConstantRange CRRHS =
7223 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7224
7225 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7227 return false;
7229 MatchInfo = [=](MachineIRBuilder &B) {
7230 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7231 B.buildConstant(Carry, 0);
7232 };
7233 return true;
7234 }
7237 MatchInfo = [=](MachineIRBuilder &B) {
7238 B.buildAdd(Dst, LHS, RHS);
7239 B.buildConstant(Carry, 1);
7240 };
7241 return true;
7242 }
7243 }
7244
7245 return false;
7246}
7247
7249 BuildFnTy &MatchInfo) {
7251 MatchInfo(Builder);
7252 Root->eraseFromParent();
7253}
7254
7256 BuildFnTy &MatchInfo) {
7257 GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
7258 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI));
7259
7260 Register Dst = Sext->getReg(0);
7261 Register Src = Trunc->getSrcReg();
7262
7263 LLT DstTy = MRI.getType(Dst);
7264 LLT SrcTy = MRI.getType(Src);
7265
7266 if (DstTy == SrcTy) {
7267 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7268 return true;
7269 }
7270
7271 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7272 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7273 MatchInfo = [=](MachineIRBuilder &B) {
7274 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap);
7275 };
7276 return true;
7277 }
7278
7279 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7280 isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) {
7281 MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
7282 return true;
7283 }
7284
7285 return false;
7286}
7287
7289 BuildFnTy &MatchInfo) {
7290 GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI));
7291 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI));
7292
7293 Register Dst = Zext->getReg(0);
7294 Register Src = Trunc->getSrcReg();
7295
7296 LLT DstTy = MRI.getType(Dst);
7297 LLT SrcTy = MRI.getType(Src);
7298
7299 if (DstTy == SrcTy) {
7300 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7301 return true;
7302 }
7303
7304 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7305 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7306 MatchInfo = [=](MachineIRBuilder &B) {
7307 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap);
7308 };
7309 return true;
7310 }
7311
7312 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7313 isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) {
7314 MatchInfo = [=](MachineIRBuilder &B) {
7315 B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg);
7316 };
7317 return true;
7318 }
7319
7320 return false;
7321}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1732
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1572
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1613
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:1255
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine sext of trunc.
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine zext of trunc.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValue() const
Definition: Constants.h:312
const APFloat & getValueAPF() const
Definition: Constants.h:311
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Register getSrcReg() const
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a sext.
Represents a trunc.
Represents a G_ZEXTLOAD.
Represents a zext.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:391
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:386
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1426
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1386
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1539
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:727
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1509
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1521
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1554
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1586
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:658
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1489
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1419
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:946
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:440
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1611
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1404
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:494
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:502
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:542
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:508
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:548
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:524
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:528
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:552
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:532
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:518
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...