LLVM 20.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
72 return Builder.getMF();
73}
74
77}
78
80
81/// \returns The little endian in-memory byte position of byte \p I in a
82/// \p ByteWidth bytes wide type.
83///
84/// E.g. Given a 4-byte type x, x[0] -> byte 0
85static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
86 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
87 return I;
88}
89
90/// Determines the LogBase2 value for a non-null input value using the
91/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
93 auto &MRI = *MIB.getMRI();
94 LLT Ty = MRI.getType(V);
95 auto Ctlz = MIB.buildCTLZ(Ty, V);
96 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
97 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
98}
99
100/// \returns The big endian in-memory byte position of byte \p I in a
101/// \p ByteWidth bytes wide type.
102///
103/// E.g. Given a 4-byte type x, x[0] -> byte 3
104static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
105 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
106 return ByteWidth - I - 1;
107}
108
109/// Given a map from byte offsets in memory to indices in a load/store,
110/// determine if that map corresponds to a little or big endian byte pattern.
111///
112/// \param MemOffset2Idx maps memory offsets to address offsets.
113/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
114///
115/// \returns true if the map corresponds to a big endian byte pattern, false if
116/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
117///
118/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
119/// are as follows:
120///
121/// AddrOffset Little endian Big endian
122/// 0 0 3
123/// 1 1 2
124/// 2 2 1
125/// 3 3 0
126static std::optional<bool>
128 int64_t LowestIdx) {
129 // Need at least two byte positions to decide on endianness.
130 unsigned Width = MemOffset2Idx.size();
131 if (Width < 2)
132 return std::nullopt;
133 bool BigEndian = true, LittleEndian = true;
134 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
135 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
136 if (MemOffsetAndIdx == MemOffset2Idx.end())
137 return std::nullopt;
138 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
139 assert(Idx >= 0 && "Expected non-negative byte offset?");
140 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
141 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
142 if (!BigEndian && !LittleEndian)
143 return std::nullopt;
144 }
145
146 assert((BigEndian != LittleEndian) &&
147 "Pattern cannot be both big and little endian!");
148 return BigEndian;
149}
150
152
153bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
154 assert(LI && "Must have LegalizerInfo to query isLegal!");
155 return LI->getAction(Query).Action == LegalizeActions::Legal;
156}
157
159 const LegalityQuery &Query) const {
160 return isPreLegalize() || isLegal(Query);
161}
162
164 if (!Ty.isVector())
165 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
166 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
167 if (isPreLegalize())
168 return true;
169 LLT EltTy = Ty.getElementType();
170 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
171 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
172}
173
175 Register ToReg) const {
177
178 if (MRI.constrainRegAttrs(ToReg, FromReg))
179 MRI.replaceRegWith(FromReg, ToReg);
180 else
181 Builder.buildCopy(ToReg, FromReg);
182
184}
185
187 MachineOperand &FromRegOp,
188 Register ToReg) const {
189 assert(FromRegOp.getParent() && "Expected an operand in an MI");
190 Observer.changingInstr(*FromRegOp.getParent());
191
192 FromRegOp.setReg(ToReg);
193
194 Observer.changedInstr(*FromRegOp.getParent());
195}
196
198 unsigned ToOpcode) const {
199 Observer.changingInstr(FromMI);
200
201 FromMI.setDesc(Builder.getTII().get(ToOpcode));
202
203 Observer.changedInstr(FromMI);
204}
205
207 return RBI->getRegBank(Reg, MRI, *TRI);
208}
209
211 if (RegBank)
212 MRI.setRegBank(Reg, *RegBank);
213}
214
216 if (matchCombineCopy(MI)) {
218 return true;
219 }
220 return false;
221}
223 if (MI.getOpcode() != TargetOpcode::COPY)
224 return false;
225 Register DstReg = MI.getOperand(0).getReg();
226 Register SrcReg = MI.getOperand(1).getReg();
227 return canReplaceReg(DstReg, SrcReg, MRI);
228}
230 Register DstReg = MI.getOperand(0).getReg();
231 Register SrcReg = MI.getOperand(1).getReg();
232 MI.eraseFromParent();
233 replaceRegWith(MRI, DstReg, SrcReg);
234}
235
237 MachineInstr &MI, BuildFnTy &MatchInfo) {
238 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
239 Register DstOp = MI.getOperand(0).getReg();
240 Register OrigOp = MI.getOperand(1).getReg();
241
242 if (!MRI.hasOneNonDBGUse(OrigOp))
243 return false;
244
245 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
246 // Even if only a single operand of the PHI is not guaranteed non-poison,
247 // moving freeze() backwards across a PHI can cause optimization issues for
248 // other users of that operand.
249 //
250 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
251 // the source register is unprofitable because it makes the freeze() more
252 // strict than is necessary (it would affect the whole register instead of
253 // just the subreg being frozen).
254 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
255 return false;
256
257 if (canCreateUndefOrPoison(OrigOp, MRI,
258 /*ConsiderFlagsAndMetadata=*/false))
259 return false;
260
261 std::optional<MachineOperand> MaybePoisonOperand;
262 for (MachineOperand &Operand : OrigDef->uses()) {
263 if (!Operand.isReg())
264 return false;
265
266 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
267 continue;
268
269 if (!MaybePoisonOperand)
270 MaybePoisonOperand = Operand;
271 else {
272 // We have more than one maybe-poison operand. Moving the freeze is
273 // unsafe.
274 return false;
275 }
276 }
277
278 // Eliminate freeze if all operands are guaranteed non-poison.
279 if (!MaybePoisonOperand) {
280 MatchInfo = [=](MachineIRBuilder &B) {
281 Observer.changingInstr(*OrigDef);
282 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
283 Observer.changedInstr(*OrigDef);
284 B.buildCopy(DstOp, OrigOp);
285 };
286 return true;
287 }
288
289 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
290 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
291
292 MatchInfo = [=](MachineIRBuilder &B) mutable {
293 Observer.changingInstr(*OrigDef);
294 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
295 Observer.changedInstr(*OrigDef);
296 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
297 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
299 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
300 Freeze.getReg(0));
301 replaceRegWith(MRI, DstOp, OrigOp);
302 };
303 return true;
304}
305
308 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
309 "Invalid instruction");
310 bool IsUndef = true;
311 MachineInstr *Undef = nullptr;
312
313 // Walk over all the operands of concat vectors and check if they are
314 // build_vector themselves or undef.
315 // Then collect their operands in Ops.
316 for (const MachineOperand &MO : MI.uses()) {
317 Register Reg = MO.getReg();
318 MachineInstr *Def = MRI.getVRegDef(Reg);
319 assert(Def && "Operand not defined");
320 if (!MRI.hasOneNonDBGUse(Reg))
321 return false;
322 switch (Def->getOpcode()) {
323 case TargetOpcode::G_BUILD_VECTOR:
324 IsUndef = false;
325 // Remember the operands of the build_vector to fold
326 // them into the yet-to-build flattened concat vectors.
327 for (const MachineOperand &BuildVecMO : Def->uses())
328 Ops.push_back(BuildVecMO.getReg());
329 break;
330 case TargetOpcode::G_IMPLICIT_DEF: {
331 LLT OpType = MRI.getType(Reg);
332 // Keep one undef value for all the undef operands.
333 if (!Undef) {
334 Builder.setInsertPt(*MI.getParent(), MI);
335 Undef = Builder.buildUndef(OpType.getScalarType());
336 }
337 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
338 OpType.getScalarType() &&
339 "All undefs should have the same type");
340 // Break the undef vector in as many scalar elements as needed
341 // for the flattening.
342 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
343 EltIdx != EltEnd; ++EltIdx)
344 Ops.push_back(Undef->getOperand(0).getReg());
345 break;
346 }
347 default:
348 return false;
349 }
350 }
351
352 // Check if the combine is illegal
353 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
355 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
356 return false;
357 }
358
359 if (IsUndef)
360 Ops.clear();
361
362 return true;
363}
366 // We determined that the concat_vectors can be flatten.
367 // Generate the flattened build_vector.
368 Register DstReg = MI.getOperand(0).getReg();
369 Builder.setInsertPt(*MI.getParent(), MI);
370 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
371
372 // Note: IsUndef is sort of redundant. We could have determine it by
373 // checking that at all Ops are undef. Alternatively, we could have
374 // generate a build_vector of undefs and rely on another combine to
375 // clean that up. For now, given we already gather this information
376 // in matchCombineConcatVectors, just save compile time and issue the
377 // right thing.
378 if (Ops.empty())
379 Builder.buildUndef(NewDstReg);
380 else
381 Builder.buildBuildVector(NewDstReg, Ops);
382 MI.eraseFromParent();
383 replaceRegWith(MRI, DstReg, NewDstReg);
384}
385
388 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
389 auto ConcatMI1 =
390 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
391 auto ConcatMI2 =
392 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
393 if (!ConcatMI1 || !ConcatMI2)
394 return false;
395
396 // Check that the sources of the Concat instructions have the same type
397 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
398 MRI.getType(ConcatMI2->getSourceReg(0)))
399 return false;
400
401 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
402 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
403 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
404 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
405 // Check if the index takes a whole source register from G_CONCAT_VECTORS
406 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
407 if (Mask[i] == -1) {
408 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
409 if (i + j >= Mask.size())
410 return false;
411 if (Mask[i + j] != -1)
412 return false;
413 }
415 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
416 return false;
417 Ops.push_back(0);
418 } else if (Mask[i] % ConcatSrcNumElt == 0) {
419 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
420 if (i + j >= Mask.size())
421 return false;
422 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
423 return false;
424 }
425 // Retrieve the source register from its respective G_CONCAT_VECTORS
426 // instruction
427 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
428 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
429 } else {
430 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
431 ConcatMI1->getNumSources()));
432 }
433 } else {
434 return false;
435 }
436 }
437
439 {TargetOpcode::G_CONCAT_VECTORS,
440 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
441 return false;
442
443 return !Ops.empty();
444}
445
448 LLT SrcTy = MRI.getType(Ops[0]);
449 Register UndefReg = 0;
450
451 for (Register &Reg : Ops) {
452 if (Reg == 0) {
453 if (UndefReg == 0)
454 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
455 Reg = UndefReg;
456 }
457 }
458
459 if (Ops.size() > 1)
460 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
461 else
462 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
463 MI.eraseFromParent();
464}
465
468 if (matchCombineShuffleVector(MI, Ops)) {
470 return true;
471 }
472 return false;
473}
474
477 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
478 "Invalid instruction kind");
479 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
480 Register Src1 = MI.getOperand(1).getReg();
481 LLT SrcType = MRI.getType(Src1);
482 // As bizarre as it may look, shuffle vector can actually produce
483 // scalar! This is because at the IR level a <1 x ty> shuffle
484 // vector is perfectly valid.
485 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
486 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
487
488 // If the resulting vector is smaller than the size of the source
489 // vectors being concatenated, we won't be able to replace the
490 // shuffle vector into a concat_vectors.
491 //
492 // Note: We may still be able to produce a concat_vectors fed by
493 // extract_vector_elt and so on. It is less clear that would
494 // be better though, so don't bother for now.
495 //
496 // If the destination is a scalar, the size of the sources doesn't
497 // matter. we will lower the shuffle to a plain copy. This will
498 // work only if the source and destination have the same size. But
499 // that's covered by the next condition.
500 //
501 // TODO: If the size between the source and destination don't match
502 // we could still emit an extract vector element in that case.
503 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
504 return false;
505
506 // Check that the shuffle mask can be broken evenly between the
507 // different sources.
508 if (DstNumElts % SrcNumElts != 0)
509 return false;
510
511 // Mask length is a multiple of the source vector length.
512 // Check if the shuffle is some kind of concatenation of the input
513 // vectors.
514 unsigned NumConcat = DstNumElts / SrcNumElts;
515 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
516 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
517 for (unsigned i = 0; i != DstNumElts; ++i) {
518 int Idx = Mask[i];
519 // Undef value.
520 if (Idx < 0)
521 continue;
522 // Ensure the indices in each SrcType sized piece are sequential and that
523 // the same source is used for the whole piece.
524 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
525 (ConcatSrcs[i / SrcNumElts] >= 0 &&
526 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
527 return false;
528 // Remember which source this index came from.
529 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
530 }
531
532 // The shuffle is concatenating multiple vectors together.
533 // Collect the different operands for that.
534 Register UndefReg;
535 Register Src2 = MI.getOperand(2).getReg();
536 for (auto Src : ConcatSrcs) {
537 if (Src < 0) {
538 if (!UndefReg) {
539 Builder.setInsertPt(*MI.getParent(), MI);
540 UndefReg = Builder.buildUndef(SrcType).getReg(0);
541 }
542 Ops.push_back(UndefReg);
543 } else if (Src == 0)
544 Ops.push_back(Src1);
545 else
546 Ops.push_back(Src2);
547 }
548 return true;
549}
550
552 const ArrayRef<Register> Ops) {
553 Register DstReg = MI.getOperand(0).getReg();
554 Builder.setInsertPt(*MI.getParent(), MI);
555 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
556
557 if (Ops.size() == 1)
558 Builder.buildCopy(NewDstReg, Ops[0]);
559 else
560 Builder.buildMergeLikeInstr(NewDstReg, Ops);
561
562 MI.eraseFromParent();
563 replaceRegWith(MRI, DstReg, NewDstReg);
564}
565
567 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
568 "Invalid instruction kind");
569
570 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
571 return Mask.size() == 1;
572}
573
575 Register DstReg = MI.getOperand(0).getReg();
576 Builder.setInsertPt(*MI.getParent(), MI);
577
578 int I = MI.getOperand(3).getShuffleMask()[0];
579 Register Src1 = MI.getOperand(1).getReg();
580 LLT Src1Ty = MRI.getType(Src1);
581 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
582 Register SrcReg;
583 if (I >= Src1NumElts) {
584 SrcReg = MI.getOperand(2).getReg();
585 I -= Src1NumElts;
586 } else if (I >= 0)
587 SrcReg = Src1;
588
589 if (I < 0)
590 Builder.buildUndef(DstReg);
591 else if (!MRI.getType(SrcReg).isVector())
592 Builder.buildCopy(DstReg, SrcReg);
593 else
595
596 MI.eraseFromParent();
597}
598
599namespace {
600
601/// Select a preference between two uses. CurrentUse is the current preference
602/// while *ForCandidate is attributes of the candidate under consideration.
603PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
604 PreferredTuple &CurrentUse,
605 const LLT TyForCandidate,
606 unsigned OpcodeForCandidate,
607 MachineInstr *MIForCandidate) {
608 if (!CurrentUse.Ty.isValid()) {
609 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
610 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
611 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
612 return CurrentUse;
613 }
614
615 // We permit the extend to hoist through basic blocks but this is only
616 // sensible if the target has extending loads. If you end up lowering back
617 // into a load and extend during the legalizer then the end result is
618 // hoisting the extend up to the load.
619
620 // Prefer defined extensions to undefined extensions as these are more
621 // likely to reduce the number of instructions.
622 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
623 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
624 return CurrentUse;
625 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
626 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
627 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
628
629 // Prefer sign extensions to zero extensions as sign-extensions tend to be
630 // more expensive. Don't do this if the load is already a zero-extend load
631 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
632 // later.
633 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
634 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
635 OpcodeForCandidate == TargetOpcode::G_ZEXT)
636 return CurrentUse;
637 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
638 OpcodeForCandidate == TargetOpcode::G_SEXT)
639 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
640 }
641
642 // This is potentially target specific. We've chosen the largest type
643 // because G_TRUNC is usually free. One potential catch with this is that
644 // some targets have a reduced number of larger registers than smaller
645 // registers and this choice potentially increases the live-range for the
646 // larger value.
647 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
648 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
649 }
650 return CurrentUse;
651}
652
653/// Find a suitable place to insert some instructions and insert them. This
654/// function accounts for special cases like inserting before a PHI node.
655/// The current strategy for inserting before PHI's is to duplicate the
656/// instructions for each predecessor. However, while that's ok for G_TRUNC
657/// on most targets since it generally requires no code, other targets/cases may
658/// want to try harder to find a dominating block.
659static void InsertInsnsWithoutSideEffectsBeforeUse(
662 MachineOperand &UseMO)>
663 Inserter) {
664 MachineInstr &UseMI = *UseMO.getParent();
665
666 MachineBasicBlock *InsertBB = UseMI.getParent();
667
668 // If the use is a PHI then we want the predecessor block instead.
669 if (UseMI.isPHI()) {
670 MachineOperand *PredBB = std::next(&UseMO);
671 InsertBB = PredBB->getMBB();
672 }
673
674 // If the block is the same block as the def then we want to insert just after
675 // the def instead of at the start of the block.
676 if (InsertBB == DefMI.getParent()) {
678 Inserter(InsertBB, std::next(InsertPt), UseMO);
679 return;
680 }
681
682 // Otherwise we want the start of the BB
683 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
684}
685} // end anonymous namespace
686
688 PreferredTuple Preferred;
689 if (matchCombineExtendingLoads(MI, Preferred)) {
690 applyCombineExtendingLoads(MI, Preferred);
691 return true;
692 }
693 return false;
694}
695
696static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
697 unsigned CandidateLoadOpc;
698 switch (ExtOpc) {
699 case TargetOpcode::G_ANYEXT:
700 CandidateLoadOpc = TargetOpcode::G_LOAD;
701 break;
702 case TargetOpcode::G_SEXT:
703 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
704 break;
705 case TargetOpcode::G_ZEXT:
706 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
707 break;
708 default:
709 llvm_unreachable("Unexpected extend opc");
710 }
711 return CandidateLoadOpc;
712}
713
715 PreferredTuple &Preferred) {
716 // We match the loads and follow the uses to the extend instead of matching
717 // the extends and following the def to the load. This is because the load
718 // must remain in the same position for correctness (unless we also add code
719 // to find a safe place to sink it) whereas the extend is freely movable.
720 // It also prevents us from duplicating the load for the volatile case or just
721 // for performance.
722 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
723 if (!LoadMI)
724 return false;
725
726 Register LoadReg = LoadMI->getDstReg();
727
728 LLT LoadValueTy = MRI.getType(LoadReg);
729 if (!LoadValueTy.isScalar())
730 return false;
731
732 // Most architectures are going to legalize <s8 loads into at least a 1 byte
733 // load, and the MMOs can only describe memory accesses in multiples of bytes.
734 // If we try to perform extload combining on those, we can end up with
735 // %a(s8) = extload %ptr (load 1 byte from %ptr)
736 // ... which is an illegal extload instruction.
737 if (LoadValueTy.getSizeInBits() < 8)
738 return false;
739
740 // For non power-of-2 types, they will very likely be legalized into multiple
741 // loads. Don't bother trying to match them into extending loads.
742 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
743 return false;
744
745 // Find the preferred type aside from the any-extends (unless it's the only
746 // one) and non-extending ops. We'll emit an extending load to that type and
747 // and emit a variant of (extend (trunc X)) for the others according to the
748 // relative type sizes. At the same time, pick an extend to use based on the
749 // extend involved in the chosen type.
750 unsigned PreferredOpcode =
751 isa<GLoad>(&MI)
752 ? TargetOpcode::G_ANYEXT
753 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
754 Preferred = {LLT(), PreferredOpcode, nullptr};
755 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
756 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
757 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
758 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
759 const auto &MMO = LoadMI->getMMO();
760 // Don't do anything for atomics.
761 if (MMO.isAtomic())
762 continue;
763 // Check for legality.
764 if (!isPreLegalize()) {
765 LegalityQuery::MemDesc MMDesc(MMO);
766 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
767 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
768 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
769 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
770 .Action != LegalizeActions::Legal)
771 continue;
772 }
773 Preferred = ChoosePreferredUse(MI, Preferred,
774 MRI.getType(UseMI.getOperand(0).getReg()),
775 UseMI.getOpcode(), &UseMI);
776 }
777 }
778
779 // There were no extends
780 if (!Preferred.MI)
781 return false;
782 // It should be impossible to chose an extend without selecting a different
783 // type since by definition the result of an extend is larger.
784 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
785
786 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
787 return true;
788}
789
791 PreferredTuple &Preferred) {
792 // Rewrite the load to the chosen extending load.
793 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
794
795 // Inserter to insert a truncate back to the original type at a given point
796 // with some basic CSE to limit truncate duplication to one per BB.
798 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
799 MachineBasicBlock::iterator InsertBefore,
800 MachineOperand &UseMO) {
801 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
802 if (PreviouslyEmitted) {
804 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
806 return;
807 }
808
809 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
810 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
811 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
812 EmittedInsns[InsertIntoBB] = NewMI;
813 replaceRegOpWith(MRI, UseMO, NewDstReg);
814 };
815
817 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
818 MI.setDesc(Builder.getTII().get(LoadOpc));
819
820 // Rewrite all the uses to fix up the types.
821 auto &LoadValue = MI.getOperand(0);
823 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
824 Uses.push_back(&UseMO);
825
826 for (auto *UseMO : Uses) {
827 MachineInstr *UseMI = UseMO->getParent();
828
829 // If the extend is compatible with the preferred extend then we should fix
830 // up the type and extend so that it uses the preferred use.
831 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
832 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
833 Register UseDstReg = UseMI->getOperand(0).getReg();
834 MachineOperand &UseSrcMO = UseMI->getOperand(1);
835 const LLT UseDstTy = MRI.getType(UseDstReg);
836 if (UseDstReg != ChosenDstReg) {
837 if (Preferred.Ty == UseDstTy) {
838 // If the use has the same type as the preferred use, then merge
839 // the vregs and erase the extend. For example:
840 // %1:_(s8) = G_LOAD ...
841 // %2:_(s32) = G_SEXT %1(s8)
842 // %3:_(s32) = G_ANYEXT %1(s8)
843 // ... = ... %3(s32)
844 // rewrites to:
845 // %2:_(s32) = G_SEXTLOAD ...
846 // ... = ... %2(s32)
847 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
849 UseMO->getParent()->eraseFromParent();
850 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
851 // If the preferred size is smaller, then keep the extend but extend
852 // from the result of the extending load. For example:
853 // %1:_(s8) = G_LOAD ...
854 // %2:_(s32) = G_SEXT %1(s8)
855 // %3:_(s64) = G_ANYEXT %1(s8)
856 // ... = ... %3(s64)
857 /// rewrites to:
858 // %2:_(s32) = G_SEXTLOAD ...
859 // %3:_(s64) = G_ANYEXT %2:_(s32)
860 // ... = ... %3(s64)
861 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
862 } else {
863 // If the preferred size is large, then insert a truncate. For
864 // example:
865 // %1:_(s8) = G_LOAD ...
866 // %2:_(s64) = G_SEXT %1(s8)
867 // %3:_(s32) = G_ZEXT %1(s8)
868 // ... = ... %3(s32)
869 /// rewrites to:
870 // %2:_(s64) = G_SEXTLOAD ...
871 // %4:_(s8) = G_TRUNC %2:_(s32)
872 // %3:_(s64) = G_ZEXT %2:_(s8)
873 // ... = ... %3(s64)
874 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
875 InsertTruncAt);
876 }
877 continue;
878 }
879 // The use is (one of) the uses of the preferred use we chose earlier.
880 // We're going to update the load to def this value later so just erase
881 // the old extend.
883 UseMO->getParent()->eraseFromParent();
884 continue;
885 }
886
887 // The use isn't an extend. Truncate back to the type we originally loaded.
888 // This is free on many targets.
889 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
890 }
891
892 MI.getOperand(0).setReg(ChosenDstReg);
894}
895
897 BuildFnTy &MatchInfo) {
898 assert(MI.getOpcode() == TargetOpcode::G_AND);
899
900 // If we have the following code:
901 // %mask = G_CONSTANT 255
902 // %ld = G_LOAD %ptr, (load s16)
903 // %and = G_AND %ld, %mask
904 //
905 // Try to fold it into
906 // %ld = G_ZEXTLOAD %ptr, (load s8)
907
908 Register Dst = MI.getOperand(0).getReg();
909 if (MRI.getType(Dst).isVector())
910 return false;
911
912 auto MaybeMask =
913 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
914 if (!MaybeMask)
915 return false;
916
917 APInt MaskVal = MaybeMask->Value;
918
919 if (!MaskVal.isMask())
920 return false;
921
922 Register SrcReg = MI.getOperand(1).getReg();
923 // Don't use getOpcodeDef() here since intermediate instructions may have
924 // multiple users.
925 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
926 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
927 return false;
928
929 Register LoadReg = LoadMI->getDstReg();
930 LLT RegTy = MRI.getType(LoadReg);
931 Register PtrReg = LoadMI->getPointerReg();
932 unsigned RegSize = RegTy.getSizeInBits();
933 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
934 unsigned MaskSizeBits = MaskVal.countr_one();
935
936 // The mask may not be larger than the in-memory type, as it might cover sign
937 // extended bits
938 if (MaskSizeBits > LoadSizeBits.getValue())
939 return false;
940
941 // If the mask covers the whole destination register, there's nothing to
942 // extend
943 if (MaskSizeBits >= RegSize)
944 return false;
945
946 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
947 // at least byte loads. Avoid creating such loads here
948 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
949 return false;
950
951 const MachineMemOperand &MMO = LoadMI->getMMO();
952 LegalityQuery::MemDesc MemDesc(MMO);
953
954 // Don't modify the memory access size if this is atomic/volatile, but we can
955 // still adjust the opcode to indicate the high bit behavior.
956 if (LoadMI->isSimple())
957 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
958 else if (LoadSizeBits.getValue() > MaskSizeBits ||
959 LoadSizeBits.getValue() == RegSize)
960 return false;
961
962 // TODO: Could check if it's legal with the reduced or original memory size.
964 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
965 return false;
966
967 MatchInfo = [=](MachineIRBuilder &B) {
968 B.setInstrAndDebugLoc(*LoadMI);
969 auto &MF = B.getMF();
970 auto PtrInfo = MMO.getPointerInfo();
971 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
972 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
973 LoadMI->eraseFromParent();
974 };
975 return true;
976}
977
979 const MachineInstr &UseMI) {
980 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
981 "shouldn't consider debug uses");
982 assert(DefMI.getParent() == UseMI.getParent());
983 if (&DefMI == &UseMI)
984 return true;
985 const MachineBasicBlock &MBB = *DefMI.getParent();
986 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
987 return &MI == &DefMI || &MI == &UseMI;
988 });
989 if (DefOrUse == MBB.end())
990 llvm_unreachable("Block must contain both DefMI and UseMI!");
991 return &*DefOrUse == &DefMI;
992}
993
995 const MachineInstr &UseMI) {
996 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
997 "shouldn't consider debug uses");
998 if (MDT)
999 return MDT->dominates(&DefMI, &UseMI);
1000 else if (DefMI.getParent() != UseMI.getParent())
1001 return false;
1002
1003 return isPredecessor(DefMI, UseMI);
1004}
1005
1007 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1008 Register SrcReg = MI.getOperand(1).getReg();
1009 Register LoadUser = SrcReg;
1010
1011 if (MRI.getType(SrcReg).isVector())
1012 return false;
1013
1014 Register TruncSrc;
1015 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1016 LoadUser = TruncSrc;
1017
1018 uint64_t SizeInBits = MI.getOperand(2).getImm();
1019 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1020 // need any extend at all, just a truncate.
1021 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1022 // If truncating more than the original extended value, abort.
1023 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1024 if (TruncSrc &&
1025 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1026 return false;
1027 if (LoadSizeBits == SizeInBits)
1028 return true;
1029 }
1030 return false;
1031}
1032
1034 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1035 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1036 MI.eraseFromParent();
1037}
1038
1040 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1041 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1042
1043 Register DstReg = MI.getOperand(0).getReg();
1044 LLT RegTy = MRI.getType(DstReg);
1045
1046 // Only supports scalars for now.
1047 if (RegTy.isVector())
1048 return false;
1049
1050 Register SrcReg = MI.getOperand(1).getReg();
1051 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1052 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
1053 return false;
1054
1055 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1056
1057 // If the sign extend extends from a narrower width than the load's width,
1058 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1059 // Avoid widening the load at all.
1060 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1061
1062 // Don't generate G_SEXTLOADs with a < 1 byte width.
1063 if (NewSizeBits < 8)
1064 return false;
1065 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1066 // anyway for most targets.
1067 if (!isPowerOf2_32(NewSizeBits))
1068 return false;
1069
1070 const MachineMemOperand &MMO = LoadDef->getMMO();
1071 LegalityQuery::MemDesc MMDesc(MMO);
1072
1073 // Don't modify the memory access size if this is atomic/volatile, but we can
1074 // still adjust the opcode to indicate the high bit behavior.
1075 if (LoadDef->isSimple())
1076 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1077 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1078 return false;
1079
1080 // TODO: Could check if it's legal with the reduced or original memory size.
1081 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1082 {MRI.getType(LoadDef->getDstReg()),
1083 MRI.getType(LoadDef->getPointerReg())},
1084 {MMDesc}}))
1085 return false;
1086
1087 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1088 return true;
1089}
1090
1092 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1093 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1094 Register LoadReg;
1095 unsigned ScalarSizeBits;
1096 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1097 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1098
1099 // If we have the following:
1100 // %ld = G_LOAD %ptr, (load 2)
1101 // %ext = G_SEXT_INREG %ld, 8
1102 // ==>
1103 // %ld = G_SEXTLOAD %ptr (load 1)
1104
1105 auto &MMO = LoadDef->getMMO();
1106 Builder.setInstrAndDebugLoc(*LoadDef);
1107 auto &MF = Builder.getMF();
1108 auto PtrInfo = MMO.getPointerInfo();
1109 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1110 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1111 LoadDef->getPointerReg(), *NewMMO);
1112 MI.eraseFromParent();
1113}
1114
1115/// Return true if 'MI' is a load or a store that may be fold it's address
1116/// operand into the load / store addressing mode.
1120 auto *MF = MI->getMF();
1121 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1122 if (!Addr)
1123 return false;
1124
1125 AM.HasBaseReg = true;
1126 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1127 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1128 else
1129 AM.Scale = 1; // [reg +/- reg]
1130
1131 return TLI.isLegalAddressingMode(
1132 MF->getDataLayout(), AM,
1133 getTypeForLLT(MI->getMMO().getMemoryType(),
1134 MF->getFunction().getContext()),
1135 MI->getMMO().getAddrSpace());
1136}
1137
1138static unsigned getIndexedOpc(unsigned LdStOpc) {
1139 switch (LdStOpc) {
1140 case TargetOpcode::G_LOAD:
1141 return TargetOpcode::G_INDEXED_LOAD;
1142 case TargetOpcode::G_STORE:
1143 return TargetOpcode::G_INDEXED_STORE;
1144 case TargetOpcode::G_ZEXTLOAD:
1145 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1146 case TargetOpcode::G_SEXTLOAD:
1147 return TargetOpcode::G_INDEXED_SEXTLOAD;
1148 default:
1149 llvm_unreachable("Unexpected opcode");
1150 }
1151}
1152
1153bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1154 // Check for legality.
1155 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1156 LLT Ty = MRI.getType(LdSt.getReg(0));
1157 LLT MemTy = LdSt.getMMO().getMemoryType();
1159 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1161 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1162 SmallVector<LLT> OpTys;
1163 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1164 OpTys = {PtrTy, Ty, Ty};
1165 else
1166 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1167
1168 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1169 return isLegal(Q);
1170}
1171
1173 "post-index-use-threshold", cl::Hidden, cl::init(32),
1174 cl::desc("Number of uses of a base pointer to check before it is no longer "
1175 "considered for post-indexing."));
1176
1177bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1179 bool &RematOffset) {
1180 // We're looking for the following pattern, for either load or store:
1181 // %baseptr:_(p0) = ...
1182 // G_STORE %val(s64), %baseptr(p0)
1183 // %offset:_(s64) = G_CONSTANT i64 -256
1184 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1185 const auto &TLI = getTargetLowering();
1186
1187 Register Ptr = LdSt.getPointerReg();
1188 // If the store is the only use, don't bother.
1189 if (MRI.hasOneNonDBGUse(Ptr))
1190 return false;
1191
1192 if (!isIndexedLoadStoreLegal(LdSt))
1193 return false;
1194
1195 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1196 return false;
1197
1198 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1199 auto *PtrDef = MRI.getVRegDef(Ptr);
1200
1201 unsigned NumUsesChecked = 0;
1202 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1203 if (++NumUsesChecked > PostIndexUseThreshold)
1204 return false; // Try to avoid exploding compile time.
1205
1206 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1207 // The use itself might be dead. This can happen during combines if DCE
1208 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1209 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1210 continue;
1211
1212 // Check the user of this isn't the store, otherwise we'd be generate a
1213 // indexed store defining its own use.
1214 if (StoredValDef == &Use)
1215 continue;
1216
1217 Offset = PtrAdd->getOffsetReg();
1218 if (!ForceLegalIndexing &&
1219 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1220 /*IsPre*/ false, MRI))
1221 continue;
1222
1223 // Make sure the offset calculation is before the potentially indexed op.
1224 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1225 RematOffset = false;
1226 if (!dominates(*OffsetDef, LdSt)) {
1227 // If the offset however is just a G_CONSTANT, we can always just
1228 // rematerialize it where we need it.
1229 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1230 continue;
1231 RematOffset = true;
1232 }
1233
1234 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1235 if (&BasePtrUse == PtrDef)
1236 continue;
1237
1238 // If the user is a later load/store that can be post-indexed, then don't
1239 // combine this one.
1240 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1241 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1242 dominates(LdSt, *BasePtrLdSt) &&
1243 isIndexedLoadStoreLegal(*BasePtrLdSt))
1244 return false;
1245
1246 // Now we're looking for the key G_PTR_ADD instruction, which contains
1247 // the offset add that we want to fold.
1248 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1249 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1250 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1251 // If the use is in a different block, then we may produce worse code
1252 // due to the extra register pressure.
1253 if (BaseUseUse.getParent() != LdSt.getParent())
1254 return false;
1255
1256 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1257 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1258 return false;
1259 }
1260 if (!dominates(LdSt, BasePtrUse))
1261 return false; // All use must be dominated by the load/store.
1262 }
1263 }
1264
1265 Addr = PtrAdd->getReg(0);
1266 Base = PtrAdd->getBaseReg();
1267 return true;
1268 }
1269
1270 return false;
1271}
1272
1273bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1275 auto &MF = *LdSt.getParent()->getParent();
1276 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1277
1278 Addr = LdSt.getPointerReg();
1281 return false;
1282
1283 if (!ForceLegalIndexing &&
1284 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1285 return false;
1286
1287 if (!isIndexedLoadStoreLegal(LdSt))
1288 return false;
1289
1291 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1292 return false;
1293
1294 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1295 // Would require a copy.
1296 if (Base == St->getValueReg())
1297 return false;
1298
1299 // We're expecting one use of Addr in MI, but it could also be the
1300 // value stored, which isn't actually dominated by the instruction.
1301 if (St->getValueReg() == Addr)
1302 return false;
1303 }
1304
1305 // Avoid increasing cross-block register pressure.
1306 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1307 if (AddrUse.getParent() != LdSt.getParent())
1308 return false;
1309
1310 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1311 // That might allow us to end base's liveness here by adjusting the constant.
1312 bool RealUse = false;
1313 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1314 if (!dominates(LdSt, AddrUse))
1315 return false; // All use must be dominated by the load/store.
1316
1317 // If Ptr may be folded in addressing mode of other use, then it's
1318 // not profitable to do this transformation.
1319 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1320 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1321 RealUse = true;
1322 } else {
1323 RealUse = true;
1324 }
1325 }
1326 return RealUse;
1327}
1328
1330 BuildFnTy &MatchInfo) {
1331 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1332
1333 // Check if there is a load that defines the vector being extracted from.
1334 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1335 if (!LoadMI)
1336 return false;
1337
1338 Register Vector = MI.getOperand(1).getReg();
1339 LLT VecEltTy = MRI.getType(Vector).getElementType();
1340
1341 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1342
1343 // Checking whether we should reduce the load width.
1345 return false;
1346
1347 // Check if the defining load is simple.
1348 if (!LoadMI->isSimple())
1349 return false;
1350
1351 // If the vector element type is not a multiple of a byte then we are unable
1352 // to correctly compute an address to load only the extracted element as a
1353 // scalar.
1354 if (!VecEltTy.isByteSized())
1355 return false;
1356
1357 // Check for load fold barriers between the extraction and the load.
1358 if (MI.getParent() != LoadMI->getParent())
1359 return false;
1360 const unsigned MaxIter = 20;
1361 unsigned Iter = 0;
1362 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1363 if (II->isLoadFoldBarrier())
1364 return false;
1365 if (Iter++ == MaxIter)
1366 return false;
1367 }
1368
1369 // Check if the new load that we are going to create is legal
1370 // if we are in the post-legalization phase.
1371 MachineMemOperand MMO = LoadMI->getMMO();
1372 Align Alignment = MMO.getAlign();
1373 MachinePointerInfo PtrInfo;
1375
1376 // Finding the appropriate PtrInfo if offset is a known constant.
1377 // This is required to create the memory operand for the narrowed load.
1378 // This machine memory operand object helps us infer about legality
1379 // before we proceed to combine the instruction.
1380 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1381 int Elt = CVal->getZExtValue();
1382 // FIXME: should be (ABI size)*Elt.
1383 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1384 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1385 } else {
1386 // Discard the pointer info except the address space because the memory
1387 // operand can't represent this new access since the offset is variable.
1388 Offset = VecEltTy.getSizeInBits() / 8;
1390 }
1391
1392 Alignment = commonAlignment(Alignment, Offset);
1393
1394 Register VecPtr = LoadMI->getPointerReg();
1395 LLT PtrTy = MRI.getType(VecPtr);
1396
1397 MachineFunction &MF = *MI.getMF();
1398 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1399
1400 LegalityQuery::MemDesc MMDesc(*NewMMO);
1401
1402 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1403
1405 return false;
1406
1407 // Load must be allowed and fast on the target.
1409 auto &DL = MF.getDataLayout();
1410 unsigned Fast = 0;
1411 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1412 &Fast) ||
1413 !Fast)
1414 return false;
1415
1416 Register Result = MI.getOperand(0).getReg();
1417 Register Index = MI.getOperand(2).getReg();
1418
1419 MatchInfo = [=](MachineIRBuilder &B) {
1420 GISelObserverWrapper DummyObserver;
1421 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1422 //// Get pointer to the vector element.
1423 Register finalPtr = Helper.getVectorElementPointer(
1424 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1425 Index);
1426 // New G_LOAD instruction.
1427 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1428 // Remove original GLOAD instruction.
1429 LoadMI->eraseFromParent();
1430 };
1431
1432 return true;
1433}
1434
1437 auto &LdSt = cast<GLoadStore>(MI);
1438
1439 if (LdSt.isAtomic())
1440 return false;
1441
1442 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1443 MatchInfo.Offset);
1444 if (!MatchInfo.IsPre &&
1445 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1446 MatchInfo.Offset, MatchInfo.RematOffset))
1447 return false;
1448
1449 return true;
1450}
1451
1454 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1455 unsigned Opcode = MI.getOpcode();
1456 bool IsStore = Opcode == TargetOpcode::G_STORE;
1457 unsigned NewOpcode = getIndexedOpc(Opcode);
1458
1459 // If the offset constant didn't happen to dominate the load/store, we can
1460 // just clone it as needed.
1461 if (MatchInfo.RematOffset) {
1462 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1463 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1464 *OldCst->getOperand(1).getCImm());
1465 MatchInfo.Offset = NewCst.getReg(0);
1466 }
1467
1468 auto MIB = Builder.buildInstr(NewOpcode);
1469 if (IsStore) {
1470 MIB.addDef(MatchInfo.Addr);
1471 MIB.addUse(MI.getOperand(0).getReg());
1472 } else {
1473 MIB.addDef(MI.getOperand(0).getReg());
1474 MIB.addDef(MatchInfo.Addr);
1475 }
1476
1477 MIB.addUse(MatchInfo.Base);
1478 MIB.addUse(MatchInfo.Offset);
1479 MIB.addImm(MatchInfo.IsPre);
1480 MIB->cloneMemRefs(*MI.getMF(), MI);
1481 MI.eraseFromParent();
1482 AddrDef.eraseFromParent();
1483
1484 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1485}
1486
1488 MachineInstr *&OtherMI) {
1489 unsigned Opcode = MI.getOpcode();
1490 bool IsDiv, IsSigned;
1491
1492 switch (Opcode) {
1493 default:
1494 llvm_unreachable("Unexpected opcode!");
1495 case TargetOpcode::G_SDIV:
1496 case TargetOpcode::G_UDIV: {
1497 IsDiv = true;
1498 IsSigned = Opcode == TargetOpcode::G_SDIV;
1499 break;
1500 }
1501 case TargetOpcode::G_SREM:
1502 case TargetOpcode::G_UREM: {
1503 IsDiv = false;
1504 IsSigned = Opcode == TargetOpcode::G_SREM;
1505 break;
1506 }
1507 }
1508
1509 Register Src1 = MI.getOperand(1).getReg();
1510 unsigned DivOpcode, RemOpcode, DivremOpcode;
1511 if (IsSigned) {
1512 DivOpcode = TargetOpcode::G_SDIV;
1513 RemOpcode = TargetOpcode::G_SREM;
1514 DivremOpcode = TargetOpcode::G_SDIVREM;
1515 } else {
1516 DivOpcode = TargetOpcode::G_UDIV;
1517 RemOpcode = TargetOpcode::G_UREM;
1518 DivremOpcode = TargetOpcode::G_UDIVREM;
1519 }
1520
1521 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1522 return false;
1523
1524 // Combine:
1525 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1526 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1527 // into:
1528 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1529
1530 // Combine:
1531 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1532 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1533 // into:
1534 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1535
1536 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1537 if (MI.getParent() == UseMI.getParent() &&
1538 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1539 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1540 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1541 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1542 OtherMI = &UseMI;
1543 return true;
1544 }
1545 }
1546
1547 return false;
1548}
1549
1551 MachineInstr *&OtherMI) {
1552 unsigned Opcode = MI.getOpcode();
1553 assert(OtherMI && "OtherMI shouldn't be empty.");
1554
1555 Register DestDivReg, DestRemReg;
1556 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1557 DestDivReg = MI.getOperand(0).getReg();
1558 DestRemReg = OtherMI->getOperand(0).getReg();
1559 } else {
1560 DestDivReg = OtherMI->getOperand(0).getReg();
1561 DestRemReg = MI.getOperand(0).getReg();
1562 }
1563
1564 bool IsSigned =
1565 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1566
1567 // Check which instruction is first in the block so we don't break def-use
1568 // deps by "moving" the instruction incorrectly. Also keep track of which
1569 // instruction is first so we pick it's operands, avoiding use-before-def
1570 // bugs.
1571 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1572 Builder.setInstrAndDebugLoc(*FirstInst);
1573
1574 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1575 : TargetOpcode::G_UDIVREM,
1576 {DestDivReg, DestRemReg},
1577 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1578 MI.eraseFromParent();
1579 OtherMI->eraseFromParent();
1580}
1581
1583 MachineInstr *&BrCond) {
1584 assert(MI.getOpcode() == TargetOpcode::G_BR);
1585
1586 // Try to match the following:
1587 // bb1:
1588 // G_BRCOND %c1, %bb2
1589 // G_BR %bb3
1590 // bb2:
1591 // ...
1592 // bb3:
1593
1594 // The above pattern does not have a fall through to the successor bb2, always
1595 // resulting in a branch no matter which path is taken. Here we try to find
1596 // and replace that pattern with conditional branch to bb3 and otherwise
1597 // fallthrough to bb2. This is generally better for branch predictors.
1598
1599 MachineBasicBlock *MBB = MI.getParent();
1601 if (BrIt == MBB->begin())
1602 return false;
1603 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1604
1605 BrCond = &*std::prev(BrIt);
1606 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1607 return false;
1608
1609 // Check that the next block is the conditional branch target. Also make sure
1610 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1611 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1612 return BrCondTarget != MI.getOperand(0).getMBB() &&
1613 MBB->isLayoutSuccessor(BrCondTarget);
1614}
1615
1617 MachineInstr *&BrCond) {
1618 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1620 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1621 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1622 // this to i1 only since we might not know for sure what kind of
1623 // compare generated the condition value.
1624 auto True = Builder.buildConstant(
1625 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1626 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1627
1628 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1630 MI.getOperand(0).setMBB(FallthroughBB);
1632
1633 // Change the conditional branch to use the inverted condition and
1634 // new target block.
1635 Observer.changingInstr(*BrCond);
1636 BrCond->getOperand(0).setReg(Xor.getReg(0));
1637 BrCond->getOperand(1).setMBB(BrTarget);
1638 Observer.changedInstr(*BrCond);
1639}
1640
1641
1643 MachineIRBuilder HelperBuilder(MI);
1644 GISelObserverWrapper DummyObserver;
1645 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1646 return Helper.lowerMemcpyInline(MI) ==
1648}
1649
1651 MachineIRBuilder HelperBuilder(MI);
1652 GISelObserverWrapper DummyObserver;
1653 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1654 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1656}
1657
1659 const MachineRegisterInfo &MRI,
1660 const APFloat &Val) {
1661 APFloat Result(Val);
1662 switch (MI.getOpcode()) {
1663 default:
1664 llvm_unreachable("Unexpected opcode!");
1665 case TargetOpcode::G_FNEG: {
1666 Result.changeSign();
1667 return Result;
1668 }
1669 case TargetOpcode::G_FABS: {
1670 Result.clearSign();
1671 return Result;
1672 }
1673 case TargetOpcode::G_FPTRUNC: {
1674 bool Unused;
1675 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1677 &Unused);
1678 return Result;
1679 }
1680 case TargetOpcode::G_FSQRT: {
1681 bool Unused;
1683 &Unused);
1684 Result = APFloat(sqrt(Result.convertToDouble()));
1685 break;
1686 }
1687 case TargetOpcode::G_FLOG2: {
1688 bool Unused;
1690 &Unused);
1691 Result = APFloat(log2(Result.convertToDouble()));
1692 break;
1693 }
1694 }
1695 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1696 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1697 // `G_FLOG2` reach here.
1698 bool Unused;
1699 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1700 return Result;
1701}
1702
1704 const ConstantFP *Cst) {
1705 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1706 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1707 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1708 MI.eraseFromParent();
1709}
1710
1712 PtrAddChain &MatchInfo) {
1713 // We're trying to match the following pattern:
1714 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1715 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1716 // -->
1717 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1718
1719 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1720 return false;
1721
1722 Register Add2 = MI.getOperand(1).getReg();
1723 Register Imm1 = MI.getOperand(2).getReg();
1724 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1725 if (!MaybeImmVal)
1726 return false;
1727
1728 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1729 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1730 return false;
1731
1732 Register Base = Add2Def->getOperand(1).getReg();
1733 Register Imm2 = Add2Def->getOperand(2).getReg();
1734 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1735 if (!MaybeImm2Val)
1736 return false;
1737
1738 // Check if the new combined immediate forms an illegal addressing mode.
1739 // Do not combine if it was legal before but would get illegal.
1740 // To do so, we need to find a load/store user of the pointer to get
1741 // the access type.
1742 Type *AccessTy = nullptr;
1743 auto &MF = *MI.getMF();
1744 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1745 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1746 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1747 MF.getFunction().getContext());
1748 break;
1749 }
1750 }
1752 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1753 AMNew.BaseOffs = CombinedImm.getSExtValue();
1754 if (AccessTy) {
1755 AMNew.HasBaseReg = true;
1757 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1758 AMOld.HasBaseReg = true;
1759 unsigned AS = MRI.getType(Add2).getAddressSpace();
1760 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1761 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1762 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1763 return false;
1764 }
1765
1766 // Pass the combined immediate to the apply function.
1767 MatchInfo.Imm = AMNew.BaseOffs;
1768 MatchInfo.Base = Base;
1769 MatchInfo.Bank = getRegBank(Imm2);
1770 return true;
1771}
1772
1774 PtrAddChain &MatchInfo) {
1775 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1776 MachineIRBuilder MIB(MI);
1777 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1778 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1779 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1781 MI.getOperand(1).setReg(MatchInfo.Base);
1782 MI.getOperand(2).setReg(NewOffset.getReg(0));
1784}
1785
1787 RegisterImmPair &MatchInfo) {
1788 // We're trying to match the following pattern with any of
1789 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1790 // %t1 = SHIFT %base, G_CONSTANT imm1
1791 // %root = SHIFT %t1, G_CONSTANT imm2
1792 // -->
1793 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1794
1795 unsigned Opcode = MI.getOpcode();
1796 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1797 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1798 Opcode == TargetOpcode::G_USHLSAT) &&
1799 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1800
1801 Register Shl2 = MI.getOperand(1).getReg();
1802 Register Imm1 = MI.getOperand(2).getReg();
1803 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1804 if (!MaybeImmVal)
1805 return false;
1806
1807 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1808 if (Shl2Def->getOpcode() != Opcode)
1809 return false;
1810
1811 Register Base = Shl2Def->getOperand(1).getReg();
1812 Register Imm2 = Shl2Def->getOperand(2).getReg();
1813 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1814 if (!MaybeImm2Val)
1815 return false;
1816
1817 // Pass the combined immediate to the apply function.
1818 MatchInfo.Imm =
1819 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1820 MatchInfo.Reg = Base;
1821
1822 // There is no simple replacement for a saturating unsigned left shift that
1823 // exceeds the scalar size.
1824 if (Opcode == TargetOpcode::G_USHLSAT &&
1825 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1826 return false;
1827
1828 return true;
1829}
1830
1832 RegisterImmPair &MatchInfo) {
1833 unsigned Opcode = MI.getOpcode();
1834 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1835 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1836 Opcode == TargetOpcode::G_USHLSAT) &&
1837 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1838
1839 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1840 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1841 auto Imm = MatchInfo.Imm;
1842
1843 if (Imm >= ScalarSizeInBits) {
1844 // Any logical shift that exceeds scalar size will produce zero.
1845 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1846 Builder.buildConstant(MI.getOperand(0), 0);
1847 MI.eraseFromParent();
1848 return;
1849 }
1850 // Arithmetic shift and saturating signed left shift have no effect beyond
1851 // scalar size.
1852 Imm = ScalarSizeInBits - 1;
1853 }
1854
1855 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1856 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1858 MI.getOperand(1).setReg(MatchInfo.Reg);
1859 MI.getOperand(2).setReg(NewImm);
1861}
1862
1864 ShiftOfShiftedLogic &MatchInfo) {
1865 // We're trying to match the following pattern with any of
1866 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1867 // with any of G_AND/G_OR/G_XOR logic instructions.
1868 // %t1 = SHIFT %X, G_CONSTANT C0
1869 // %t2 = LOGIC %t1, %Y
1870 // %root = SHIFT %t2, G_CONSTANT C1
1871 // -->
1872 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1873 // %t4 = SHIFT %Y, G_CONSTANT C1
1874 // %root = LOGIC %t3, %t4
1875 unsigned ShiftOpcode = MI.getOpcode();
1876 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1877 ShiftOpcode == TargetOpcode::G_ASHR ||
1878 ShiftOpcode == TargetOpcode::G_LSHR ||
1879 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1880 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1881 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1882
1883 // Match a one-use bitwise logic op.
1884 Register LogicDest = MI.getOperand(1).getReg();
1885 if (!MRI.hasOneNonDBGUse(LogicDest))
1886 return false;
1887
1888 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1889 unsigned LogicOpcode = LogicMI->getOpcode();
1890 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1891 LogicOpcode != TargetOpcode::G_XOR)
1892 return false;
1893
1894 // Find a matching one-use shift by constant.
1895 const Register C1 = MI.getOperand(2).getReg();
1896 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1897 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1898 return false;
1899
1900 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1901
1902 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1903 // Shift should match previous one and should be a one-use.
1904 if (MI->getOpcode() != ShiftOpcode ||
1905 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1906 return false;
1907
1908 // Must be a constant.
1909 auto MaybeImmVal =
1910 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1911 if (!MaybeImmVal)
1912 return false;
1913
1914 ShiftVal = MaybeImmVal->Value.getSExtValue();
1915 return true;
1916 };
1917
1918 // Logic ops are commutative, so check each operand for a match.
1919 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1920 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1921 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1922 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1923 uint64_t C0Val;
1924
1925 if (matchFirstShift(LogicMIOp1, C0Val)) {
1926 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1927 MatchInfo.Shift2 = LogicMIOp1;
1928 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1929 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1930 MatchInfo.Shift2 = LogicMIOp2;
1931 } else
1932 return false;
1933
1934 MatchInfo.ValSum = C0Val + C1Val;
1935
1936 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1937 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1938 return false;
1939
1940 MatchInfo.Logic = LogicMI;
1941 return true;
1942}
1943
1945 ShiftOfShiftedLogic &MatchInfo) {
1946 unsigned Opcode = MI.getOpcode();
1947 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1948 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1949 Opcode == TargetOpcode::G_SSHLSAT) &&
1950 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1951
1952 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1953 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1954
1955 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1956
1957 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1958 Register Shift1 =
1959 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1960
1961 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1962 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1963 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1964 // remove old shift1. And it will cause crash later. So erase it earlier to
1965 // avoid the crash.
1966 MatchInfo.Shift2->eraseFromParent();
1967
1968 Register Shift2Const = MI.getOperand(2).getReg();
1969 Register Shift2 = Builder
1970 .buildInstr(Opcode, {DestType},
1971 {MatchInfo.LogicNonShiftReg, Shift2Const})
1972 .getReg(0);
1973
1974 Register Dest = MI.getOperand(0).getReg();
1975 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1976
1977 // This was one use so it's safe to remove it.
1978 MatchInfo.Logic->eraseFromParent();
1979
1980 MI.eraseFromParent();
1981}
1982
1984 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1985 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1986 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1987 auto &Shl = cast<GenericMachineInstr>(MI);
1988 Register DstReg = Shl.getReg(0);
1989 Register SrcReg = Shl.getReg(1);
1990 Register ShiftReg = Shl.getReg(2);
1991 Register X, C1;
1992
1993 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1994 return false;
1995
1996 if (!mi_match(SrcReg, MRI,
1998 m_GOr(m_Reg(X), m_Reg(C1))))))
1999 return false;
2000
2001 APInt C1Val, C2Val;
2002 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2003 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2004 return false;
2005
2006 auto *SrcDef = MRI.getVRegDef(SrcReg);
2007 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2008 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2009 LLT SrcTy = MRI.getType(SrcReg);
2010 MatchInfo = [=](MachineIRBuilder &B) {
2011 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2012 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2013 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2014 };
2015 return true;
2016}
2017
2019 unsigned &ShiftVal) {
2020 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2021 auto MaybeImmVal =
2022 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2023 if (!MaybeImmVal)
2024 return false;
2025
2026 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2027 return (static_cast<int32_t>(ShiftVal) != -1);
2028}
2029
2031 unsigned &ShiftVal) {
2032 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2033 MachineIRBuilder MIB(MI);
2034 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2035 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2037 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2038 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2040}
2041
2042// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2044 RegisterImmPair &MatchData) {
2045 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
2046 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2047 return false;
2048
2049 Register LHS = MI.getOperand(1).getReg();
2050
2051 Register ExtSrc;
2052 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2053 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2054 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2055 return false;
2056
2057 Register RHS = MI.getOperand(2).getReg();
2058 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2059 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2060 if (!MaybeShiftAmtVal)
2061 return false;
2062
2063 if (LI) {
2064 LLT SrcTy = MRI.getType(ExtSrc);
2065
2066 // We only really care about the legality with the shifted value. We can
2067 // pick any type the constant shift amount, so ask the target what to
2068 // use. Otherwise we would have to guess and hope it is reported as legal.
2069 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2070 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2071 return false;
2072 }
2073
2074 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2075 MatchData.Reg = ExtSrc;
2076 MatchData.Imm = ShiftAmt;
2077
2078 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2079 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2080 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2081}
2082
2084 const RegisterImmPair &MatchData) {
2085 Register ExtSrcReg = MatchData.Reg;
2086 int64_t ShiftAmtVal = MatchData.Imm;
2087
2088 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2089 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2090 auto NarrowShift =
2091 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2092 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2093 MI.eraseFromParent();
2094}
2095
2097 Register &MatchInfo) {
2098 GMerge &Merge = cast<GMerge>(MI);
2099 SmallVector<Register, 16> MergedValues;
2100 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2101 MergedValues.emplace_back(Merge.getSourceReg(I));
2102
2103 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2104 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2105 return false;
2106
2107 for (unsigned I = 0; I < MergedValues.size(); ++I)
2108 if (MergedValues[I] != Unmerge->getReg(I))
2109 return false;
2110
2111 MatchInfo = Unmerge->getSourceReg();
2112 return true;
2113}
2114
2116 const MachineRegisterInfo &MRI) {
2117 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2118 ;
2119
2120 return Reg;
2121}
2122
2125 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2126 "Expected an unmerge");
2127 auto &Unmerge = cast<GUnmerge>(MI);
2128 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2129
2130 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2131 if (!SrcInstr)
2132 return false;
2133
2134 // Check the source type of the merge.
2135 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2136 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2137 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2138 if (SrcMergeTy != Dst0Ty && !SameSize)
2139 return false;
2140 // They are the same now (modulo a bitcast).
2141 // We can collect all the src registers.
2142 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2143 Operands.push_back(SrcInstr->getSourceReg(Idx));
2144 return true;
2145}
2146
2149 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2150 "Expected an unmerge");
2151 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2152 "Not enough operands to replace all defs");
2153 unsigned NumElems = MI.getNumOperands() - 1;
2154
2155 LLT SrcTy = MRI.getType(Operands[0]);
2156 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2157 bool CanReuseInputDirectly = DstTy == SrcTy;
2158 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2159 Register DstReg = MI.getOperand(Idx).getReg();
2160 Register SrcReg = Operands[Idx];
2161
2162 // This combine may run after RegBankSelect, so we need to be aware of
2163 // register banks.
2164 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2165 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2166 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2167 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2168 }
2169
2170 if (CanReuseInputDirectly)
2171 replaceRegWith(MRI, DstReg, SrcReg);
2172 else
2173 Builder.buildCast(DstReg, SrcReg);
2174 }
2175 MI.eraseFromParent();
2176}
2177
2179 SmallVectorImpl<APInt> &Csts) {
2180 unsigned SrcIdx = MI.getNumOperands() - 1;
2181 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2182 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2183 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2184 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2185 return false;
2186 // Break down the big constant in smaller ones.
2187 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2188 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2189 ? CstVal.getCImm()->getValue()
2190 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2191
2192 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2193 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2194 // Unmerge a constant.
2195 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2196 Csts.emplace_back(Val.trunc(ShiftAmt));
2197 Val = Val.lshr(ShiftAmt);
2198 }
2199
2200 return true;
2201}
2202
2204 SmallVectorImpl<APInt> &Csts) {
2205 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2206 "Expected an unmerge");
2207 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2208 "Not enough operands to replace all defs");
2209 unsigned NumElems = MI.getNumOperands() - 1;
2210 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2211 Register DstReg = MI.getOperand(Idx).getReg();
2212 Builder.buildConstant(DstReg, Csts[Idx]);
2213 }
2214
2215 MI.eraseFromParent();
2216}
2217
2219 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2220 unsigned SrcIdx = MI.getNumOperands() - 1;
2221 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2222 MatchInfo = [&MI](MachineIRBuilder &B) {
2223 unsigned NumElems = MI.getNumOperands() - 1;
2224 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2225 Register DstReg = MI.getOperand(Idx).getReg();
2226 B.buildUndef(DstReg);
2227 }
2228 };
2229 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2230}
2231
2233 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2234 "Expected an unmerge");
2235 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2236 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2237 return false;
2238 // Check that all the lanes are dead except the first one.
2239 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2240 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2241 return false;
2242 }
2243 return true;
2244}
2245
2247 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2248 Register Dst0Reg = MI.getOperand(0).getReg();
2249 Builder.buildTrunc(Dst0Reg, SrcReg);
2250 MI.eraseFromParent();
2251}
2252
2254 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2255 "Expected an unmerge");
2256 Register Dst0Reg = MI.getOperand(0).getReg();
2257 LLT Dst0Ty = MRI.getType(Dst0Reg);
2258 // G_ZEXT on vector applies to each lane, so it will
2259 // affect all destinations. Therefore we won't be able
2260 // to simplify the unmerge to just the first definition.
2261 if (Dst0Ty.isVector())
2262 return false;
2263 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2264 LLT SrcTy = MRI.getType(SrcReg);
2265 if (SrcTy.isVector())
2266 return false;
2267
2268 Register ZExtSrcReg;
2269 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2270 return false;
2271
2272 // Finally we can replace the first definition with
2273 // a zext of the source if the definition is big enough to hold
2274 // all of ZExtSrc bits.
2275 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2276 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2277}
2278
2280 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2281 "Expected an unmerge");
2282
2283 Register Dst0Reg = MI.getOperand(0).getReg();
2284
2285 MachineInstr *ZExtInstr =
2286 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2287 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2288 "Expecting a G_ZEXT");
2289
2290 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2291 LLT Dst0Ty = MRI.getType(Dst0Reg);
2292 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2293
2294 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2295 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2296 } else {
2297 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2298 "ZExt src doesn't fit in destination");
2299 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2300 }
2301
2302 Register ZeroReg;
2303 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2304 if (!ZeroReg)
2305 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2306 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2307 }
2308 MI.eraseFromParent();
2309}
2310
2312 unsigned TargetShiftSize,
2313 unsigned &ShiftVal) {
2314 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2315 MI.getOpcode() == TargetOpcode::G_LSHR ||
2316 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2317
2318 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2319 if (Ty.isVector()) // TODO:
2320 return false;
2321
2322 // Don't narrow further than the requested size.
2323 unsigned Size = Ty.getSizeInBits();
2324 if (Size <= TargetShiftSize)
2325 return false;
2326
2327 auto MaybeImmVal =
2328 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2329 if (!MaybeImmVal)
2330 return false;
2331
2332 ShiftVal = MaybeImmVal->Value.getSExtValue();
2333 return ShiftVal >= Size / 2 && ShiftVal < Size;
2334}
2335
2337 const unsigned &ShiftVal) {
2338 Register DstReg = MI.getOperand(0).getReg();
2339 Register SrcReg = MI.getOperand(1).getReg();
2340 LLT Ty = MRI.getType(SrcReg);
2341 unsigned Size = Ty.getSizeInBits();
2342 unsigned HalfSize = Size / 2;
2343 assert(ShiftVal >= HalfSize);
2344
2345 LLT HalfTy = LLT::scalar(HalfSize);
2346
2347 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2348 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2349
2350 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2351 Register Narrowed = Unmerge.getReg(1);
2352
2353 // dst = G_LSHR s64:x, C for C >= 32
2354 // =>
2355 // lo, hi = G_UNMERGE_VALUES x
2356 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2357
2358 if (NarrowShiftAmt != 0) {
2359 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2360 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2361 }
2362
2363 auto Zero = Builder.buildConstant(HalfTy, 0);
2364 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2365 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2366 Register Narrowed = Unmerge.getReg(0);
2367 // dst = G_SHL s64:x, C for C >= 32
2368 // =>
2369 // lo, hi = G_UNMERGE_VALUES x
2370 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2371 if (NarrowShiftAmt != 0) {
2372 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2373 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2374 }
2375
2376 auto Zero = Builder.buildConstant(HalfTy, 0);
2377 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2378 } else {
2379 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2380 auto Hi = Builder.buildAShr(
2381 HalfTy, Unmerge.getReg(1),
2382 Builder.buildConstant(HalfTy, HalfSize - 1));
2383
2384 if (ShiftVal == HalfSize) {
2385 // (G_ASHR i64:x, 32) ->
2386 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2387 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2388 } else if (ShiftVal == Size - 1) {
2389 // Don't need a second shift.
2390 // (G_ASHR i64:x, 63) ->
2391 // %narrowed = (G_ASHR hi_32(x), 31)
2392 // G_MERGE_VALUES %narrowed, %narrowed
2393 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2394 } else {
2395 auto Lo = Builder.buildAShr(
2396 HalfTy, Unmerge.getReg(1),
2397 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2398
2399 // (G_ASHR i64:x, C) ->, for C >= 32
2400 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2401 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2402 }
2403 }
2404
2405 MI.eraseFromParent();
2406}
2407
2409 unsigned TargetShiftAmount) {
2410 unsigned ShiftAmt;
2411 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2412 applyCombineShiftToUnmerge(MI, ShiftAmt);
2413 return true;
2414 }
2415
2416 return false;
2417}
2418
2420 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2421 Register DstReg = MI.getOperand(0).getReg();
2422 LLT DstTy = MRI.getType(DstReg);
2423 Register SrcReg = MI.getOperand(1).getReg();
2424 return mi_match(SrcReg, MRI,
2425 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2426}
2427
2429 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2430 Register DstReg = MI.getOperand(0).getReg();
2431 Builder.buildCopy(DstReg, Reg);
2432 MI.eraseFromParent();
2433}
2434
2436 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2437 Register DstReg = MI.getOperand(0).getReg();
2438 Builder.buildZExtOrTrunc(DstReg, Reg);
2439 MI.eraseFromParent();
2440}
2441
2443 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2444 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2445 Register LHS = MI.getOperand(1).getReg();
2446 Register RHS = MI.getOperand(2).getReg();
2447 LLT IntTy = MRI.getType(LHS);
2448
2449 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2450 // instruction.
2451 PtrReg.second = false;
2452 for (Register SrcReg : {LHS, RHS}) {
2453 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2454 // Don't handle cases where the integer is implicitly converted to the
2455 // pointer width.
2456 LLT PtrTy = MRI.getType(PtrReg.first);
2457 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2458 return true;
2459 }
2460
2461 PtrReg.second = true;
2462 }
2463
2464 return false;
2465}
2466
2468 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2469 Register Dst = MI.getOperand(0).getReg();
2470 Register LHS = MI.getOperand(1).getReg();
2471 Register RHS = MI.getOperand(2).getReg();
2472
2473 const bool DoCommute = PtrReg.second;
2474 if (DoCommute)
2475 std::swap(LHS, RHS);
2476 LHS = PtrReg.first;
2477
2478 LLT PtrTy = MRI.getType(LHS);
2479
2480 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2481 Builder.buildPtrToInt(Dst, PtrAdd);
2482 MI.eraseFromParent();
2483}
2484
2486 APInt &NewCst) {
2487 auto &PtrAdd = cast<GPtrAdd>(MI);
2488 Register LHS = PtrAdd.getBaseReg();
2489 Register RHS = PtrAdd.getOffsetReg();
2491
2492 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2493 APInt Cst;
2494 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2495 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2496 // G_INTTOPTR uses zero-extension
2497 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2498 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2499 return true;
2500 }
2501 }
2502
2503 return false;
2504}
2505
2507 APInt &NewCst) {
2508 auto &PtrAdd = cast<GPtrAdd>(MI);
2509 Register Dst = PtrAdd.getReg(0);
2510
2511 Builder.buildConstant(Dst, NewCst);
2512 PtrAdd.eraseFromParent();
2513}
2514
2516 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2517 Register DstReg = MI.getOperand(0).getReg();
2518 Register SrcReg = MI.getOperand(1).getReg();
2519 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2520 if (OriginalSrcReg.isValid())
2521 SrcReg = OriginalSrcReg;
2522 LLT DstTy = MRI.getType(DstReg);
2523 return mi_match(SrcReg, MRI,
2524 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2525}
2526
2528 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2529 Register DstReg = MI.getOperand(0).getReg();
2530 Register SrcReg = MI.getOperand(1).getReg();
2531 LLT DstTy = MRI.getType(DstReg);
2532 if (mi_match(SrcReg, MRI,
2533 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2534 unsigned DstSize = DstTy.getScalarSizeInBits();
2535 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2536 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2537 }
2538 return false;
2539}
2540
2542 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2543 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2544
2545 // ShiftTy > 32 > TruncTy -> 32
2546 if (ShiftSize > 32 && TruncSize < 32)
2547 return ShiftTy.changeElementSize(32);
2548
2549 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2550 // Some targets like it, some don't, some only like it under certain
2551 // conditions/processor versions, etc.
2552 // A TL hook might be needed for this.
2553
2554 // Don't combine
2555 return ShiftTy;
2556}
2557
2559 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2560 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2561 Register DstReg = MI.getOperand(0).getReg();
2562 Register SrcReg = MI.getOperand(1).getReg();
2563
2564 if (!MRI.hasOneNonDBGUse(SrcReg))
2565 return false;
2566
2567 LLT SrcTy = MRI.getType(SrcReg);
2568 LLT DstTy = MRI.getType(DstReg);
2569
2570 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2571 const auto &TL = getTargetLowering();
2572
2573 LLT NewShiftTy;
2574 switch (SrcMI->getOpcode()) {
2575 default:
2576 return false;
2577 case TargetOpcode::G_SHL: {
2578 NewShiftTy = DstTy;
2579
2580 // Make sure new shift amount is legal.
2581 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2582 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2583 return false;
2584 break;
2585 }
2586 case TargetOpcode::G_LSHR:
2587 case TargetOpcode::G_ASHR: {
2588 // For right shifts, we conservatively do not do the transform if the TRUNC
2589 // has any STORE users. The reason is that if we change the type of the
2590 // shift, we may break the truncstore combine.
2591 //
2592 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2593 for (auto &User : MRI.use_instructions(DstReg))
2594 if (User.getOpcode() == TargetOpcode::G_STORE)
2595 return false;
2596
2597 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2598 if (NewShiftTy == SrcTy)
2599 return false;
2600
2601 // Make sure we won't lose information by truncating the high bits.
2602 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2603 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2604 DstTy.getScalarSizeInBits()))
2605 return false;
2606 break;
2607 }
2608 }
2609
2611 {SrcMI->getOpcode(),
2612 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2613 return false;
2614
2615 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2616 return true;
2617}
2618
2620 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2621 MachineInstr *ShiftMI = MatchInfo.first;
2622 LLT NewShiftTy = MatchInfo.second;
2623
2624 Register Dst = MI.getOperand(0).getReg();
2625 LLT DstTy = MRI.getType(Dst);
2626
2627 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2628 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2629 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2630
2631 Register NewShift =
2632 Builder
2633 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2634 .getReg(0);
2635
2636 if (NewShiftTy == DstTy)
2637 replaceRegWith(MRI, Dst, NewShift);
2638 else
2639 Builder.buildTrunc(Dst, NewShift);
2640
2641 eraseInst(MI);
2642}
2643
2645 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2646 return MO.isReg() &&
2647 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2648 });
2649}
2650
2652 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2653 return !MO.isReg() ||
2654 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2655 });
2656}
2657
2659 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2660 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2661 return all_of(Mask, [](int Elt) { return Elt < 0; });
2662}
2663
2665 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2666 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2667 MRI);
2668}
2669
2671 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2672 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2673 MRI);
2674}
2675
2677 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2678 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2679 "Expected an insert/extract element op");
2680 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2681 unsigned IdxIdx =
2682 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2683 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2684 if (!Idx)
2685 return false;
2686 return Idx->getZExtValue() >= VecTy.getNumElements();
2687}
2688
2690 GSelect &SelMI = cast<GSelect>(MI);
2691 auto Cst =
2693 if (!Cst)
2694 return false;
2695 OpIdx = Cst->isZero() ? 3 : 2;
2696 return true;
2697}
2698
2699void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2700
2702 const MachineOperand &MOP2) {
2703 if (!MOP1.isReg() || !MOP2.isReg())
2704 return false;
2705 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2706 if (!InstAndDef1)
2707 return false;
2708 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2709 if (!InstAndDef2)
2710 return false;
2711 MachineInstr *I1 = InstAndDef1->MI;
2712 MachineInstr *I2 = InstAndDef2->MI;
2713
2714 // Handle a case like this:
2715 //
2716 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2717 //
2718 // Even though %0 and %1 are produced by the same instruction they are not
2719 // the same values.
2720 if (I1 == I2)
2721 return MOP1.getReg() == MOP2.getReg();
2722
2723 // If we have an instruction which loads or stores, we can't guarantee that
2724 // it is identical.
2725 //
2726 // For example, we may have
2727 //
2728 // %x1 = G_LOAD %addr (load N from @somewhere)
2729 // ...
2730 // call @foo
2731 // ...
2732 // %x2 = G_LOAD %addr (load N from @somewhere)
2733 // ...
2734 // %or = G_OR %x1, %x2
2735 //
2736 // It's possible that @foo will modify whatever lives at the address we're
2737 // loading from. To be safe, let's just assume that all loads and stores
2738 // are different (unless we have something which is guaranteed to not
2739 // change.)
2740 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2741 return false;
2742
2743 // If both instructions are loads or stores, they are equal only if both
2744 // are dereferenceable invariant loads with the same number of bits.
2745 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2746 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2747 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2748 if (!LS1 || !LS2)
2749 return false;
2750
2751 if (!I2->isDereferenceableInvariantLoad() ||
2752 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2753 return false;
2754 }
2755
2756 // Check for physical registers on the instructions first to avoid cases
2757 // like this:
2758 //
2759 // %a = COPY $physreg
2760 // ...
2761 // SOMETHING implicit-def $physreg
2762 // ...
2763 // %b = COPY $physreg
2764 //
2765 // These copies are not equivalent.
2766 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2767 return MO.isReg() && MO.getReg().isPhysical();
2768 })) {
2769 // Check if we have a case like this:
2770 //
2771 // %a = COPY $physreg
2772 // %b = COPY %a
2773 //
2774 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2775 // From that, we know that they must have the same value, since they must
2776 // have come from the same COPY.
2777 return I1->isIdenticalTo(*I2);
2778 }
2779
2780 // We don't have any physical registers, so we don't necessarily need the
2781 // same vreg defs.
2782 //
2783 // On the off-chance that there's some target instruction feeding into the
2784 // instruction, let's use produceSameValue instead of isIdenticalTo.
2785 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2786 // Handle instructions with multiple defs that produce same values. Values
2787 // are same for operands with same index.
2788 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2789 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2790 // I1 and I2 are different instructions but produce same values,
2791 // %1 and %6 are same, %1 and %7 are not the same value.
2792 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2793 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2794 }
2795 return false;
2796}
2797
2799 if (!MOP.isReg())
2800 return false;
2801 auto *MI = MRI.getVRegDef(MOP.getReg());
2802 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2803 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2804 MaybeCst->getSExtValue() == C;
2805}
2806
2808 if (!MOP.isReg())
2809 return false;
2810 std::optional<FPValueAndVReg> MaybeCst;
2811 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2812 return false;
2813
2814 return MaybeCst->Value.isExactlyValue(C);
2815}
2816
2818 unsigned OpIdx) {
2819 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2820 Register OldReg = MI.getOperand(0).getReg();
2821 Register Replacement = MI.getOperand(OpIdx).getReg();
2822 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2823 MI.eraseFromParent();
2824 replaceRegWith(MRI, OldReg, Replacement);
2825}
2826
2828 Register Replacement) {
2829 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2830 Register OldReg = MI.getOperand(0).getReg();
2831 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2832 MI.eraseFromParent();
2833 replaceRegWith(MRI, OldReg, Replacement);
2834}
2835
2837 unsigned ConstIdx) {
2838 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2839 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2840
2841 // Get the shift amount
2842 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2843 if (!VRegAndVal)
2844 return false;
2845
2846 // Return true of shift amount >= Bitwidth
2847 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2848}
2849
2851 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2852 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2853 "This is not a funnel shift operation");
2854
2855 Register ConstReg = MI.getOperand(3).getReg();
2856 LLT ConstTy = MRI.getType(ConstReg);
2857 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2858
2859 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2860 assert((VRegAndVal) && "Value is not a constant");
2861
2862 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2863 APInt NewConst = VRegAndVal->Value.urem(
2864 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2865
2866 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2868 MI.getOpcode(), {MI.getOperand(0)},
2869 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2870
2871 MI.eraseFromParent();
2872}
2873
2875 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2876 // Match (cond ? x : x)
2877 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2878 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2879 MRI);
2880}
2881
2883 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2884 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2885 MRI);
2886}
2887
2889 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2890 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2891 MRI);
2892}
2893
2895 MachineOperand &MO = MI.getOperand(OpIdx);
2896 return MO.isReg() &&
2897 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2898}
2899
2901 unsigned OpIdx) {
2902 MachineOperand &MO = MI.getOperand(OpIdx);
2903 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2904}
2905
2907 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2908 Builder.buildFConstant(MI.getOperand(0), C);
2909 MI.eraseFromParent();
2910}
2911
2913 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2914 Builder.buildConstant(MI.getOperand(0), C);
2915 MI.eraseFromParent();
2916}
2917
2919 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2920 Builder.buildConstant(MI.getOperand(0), C);
2921 MI.eraseFromParent();
2922}
2923
2925 ConstantFP *CFP) {
2926 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2927 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2928 MI.eraseFromParent();
2929}
2930
2932 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2933 Builder.buildUndef(MI.getOperand(0));
2934 MI.eraseFromParent();
2935}
2936
2938 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2939 Register LHS = MI.getOperand(1).getReg();
2940 Register RHS = MI.getOperand(2).getReg();
2941 Register &NewLHS = std::get<0>(MatchInfo);
2942 Register &NewRHS = std::get<1>(MatchInfo);
2943
2944 // Helper lambda to check for opportunities for
2945 // ((0-A) + B) -> B - A
2946 // (A + (0-B)) -> A - B
2947 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2948 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2949 return false;
2950 NewLHS = MaybeNewLHS;
2951 return true;
2952 };
2953
2954 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2955}
2956
2959 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2960 "Invalid opcode");
2961 Register DstReg = MI.getOperand(0).getReg();
2962 LLT DstTy = MRI.getType(DstReg);
2963 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2964 unsigned NumElts = DstTy.getNumElements();
2965 // If this MI is part of a sequence of insert_vec_elts, then
2966 // don't do the combine in the middle of the sequence.
2967 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2968 TargetOpcode::G_INSERT_VECTOR_ELT)
2969 return false;
2970 MachineInstr *CurrInst = &MI;
2971 MachineInstr *TmpInst;
2972 int64_t IntImm;
2973 Register TmpReg;
2974 MatchInfo.resize(NumElts);
2975 while (mi_match(
2976 CurrInst->getOperand(0).getReg(), MRI,
2977 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2978 if (IntImm >= NumElts || IntImm < 0)
2979 return false;
2980 if (!MatchInfo[IntImm])
2981 MatchInfo[IntImm] = TmpReg;
2982 CurrInst = TmpInst;
2983 }
2984 // Variable index.
2985 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2986 return false;
2987 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2988 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2989 if (!MatchInfo[I - 1].isValid())
2990 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2991 }
2992 return true;
2993 }
2994 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
2995 // overwritten, bail out.
2996 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
2997 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
2998}
2999
3002 Register UndefReg;
3003 auto GetUndef = [&]() {
3004 if (UndefReg)
3005 return UndefReg;
3006 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3007 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3008 return UndefReg;
3009 };
3010 for (Register &Reg : MatchInfo) {
3011 if (!Reg)
3012 Reg = GetUndef();
3013 }
3014 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3015 MI.eraseFromParent();
3016}
3017
3019 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3020 Register SubLHS, SubRHS;
3021 std::tie(SubLHS, SubRHS) = MatchInfo;
3022 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3023 MI.eraseFromParent();
3024}
3025
3028 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3029 //
3030 // Creates the new hand + logic instruction (but does not insert them.)
3031 //
3032 // On success, MatchInfo is populated with the new instructions. These are
3033 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3034 unsigned LogicOpcode = MI.getOpcode();
3035 assert(LogicOpcode == TargetOpcode::G_AND ||
3036 LogicOpcode == TargetOpcode::G_OR ||
3037 LogicOpcode == TargetOpcode::G_XOR);
3038 MachineIRBuilder MIB(MI);
3039 Register Dst = MI.getOperand(0).getReg();
3040 Register LHSReg = MI.getOperand(1).getReg();
3041 Register RHSReg = MI.getOperand(2).getReg();
3042
3043 // Don't recompute anything.
3044 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3045 return false;
3046
3047 // Make sure we have (hand x, ...), (hand y, ...)
3048 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3049 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3050 if (!LeftHandInst || !RightHandInst)
3051 return false;
3052 unsigned HandOpcode = LeftHandInst->getOpcode();
3053 if (HandOpcode != RightHandInst->getOpcode())
3054 return false;
3055 if (!LeftHandInst->getOperand(1).isReg() ||
3056 !RightHandInst->getOperand(1).isReg())
3057 return false;
3058
3059 // Make sure the types match up, and if we're doing this post-legalization,
3060 // we end up with legal types.
3061 Register X = LeftHandInst->getOperand(1).getReg();
3062 Register Y = RightHandInst->getOperand(1).getReg();
3063 LLT XTy = MRI.getType(X);
3064 LLT YTy = MRI.getType(Y);
3065 if (!XTy.isValid() || XTy != YTy)
3066 return false;
3067
3068 // Optional extra source register.
3069 Register ExtraHandOpSrcReg;
3070 switch (HandOpcode) {
3071 default:
3072 return false;
3073 case TargetOpcode::G_ANYEXT:
3074 case TargetOpcode::G_SEXT:
3075 case TargetOpcode::G_ZEXT: {
3076 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3077 break;
3078 }
3079 case TargetOpcode::G_TRUNC: {
3080 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3081 const MachineFunction *MF = MI.getMF();
3082 const DataLayout &DL = MF->getDataLayout();
3083 LLVMContext &Ctx = MF->getFunction().getContext();
3084
3085 LLT DstTy = MRI.getType(Dst);
3086 const TargetLowering &TLI = getTargetLowering();
3087
3088 // Be extra careful sinking truncate. If it's free, there's no benefit in
3089 // widening a binop.
3090 if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
3091 TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
3092 return false;
3093 break;
3094 }
3095 case TargetOpcode::G_AND:
3096 case TargetOpcode::G_ASHR:
3097 case TargetOpcode::G_LSHR:
3098 case TargetOpcode::G_SHL: {
3099 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3100 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3101 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3102 return false;
3103 ExtraHandOpSrcReg = ZOp.getReg();
3104 break;
3105 }
3106 }
3107
3108 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3109 return false;
3110
3111 // Record the steps to build the new instructions.
3112 //
3113 // Steps to build (logic x, y)
3114 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3115 OperandBuildSteps LogicBuildSteps = {
3116 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3117 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3118 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3119 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3120
3121 // Steps to build hand (logic x, y), ...z
3122 OperandBuildSteps HandBuildSteps = {
3123 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3124 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3125 if (ExtraHandOpSrcReg.isValid())
3126 HandBuildSteps.push_back(
3127 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3128 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3129
3130 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3131 return true;
3132}
3133
3136 assert(MatchInfo.InstrsToBuild.size() &&
3137 "Expected at least one instr to build?");
3138 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3139 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3140 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3141 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3142 for (auto &OperandFn : InstrToBuild.OperandFns)
3143 OperandFn(Instr);
3144 }
3145 MI.eraseFromParent();
3146}
3147
3149 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3150 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3151 int64_t ShlCst, AshrCst;
3152 Register Src;
3153 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3154 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3155 m_ICstOrSplat(AshrCst))))
3156 return false;
3157 if (ShlCst != AshrCst)
3158 return false;
3160 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3161 return false;
3162 MatchInfo = std::make_tuple(Src, ShlCst);
3163 return true;
3164}
3165
3167 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3168 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3169 Register Src;
3170 int64_t ShiftAmt;
3171 std::tie(Src, ShiftAmt) = MatchInfo;
3172 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3173 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3174 MI.eraseFromParent();
3175}
3176
3177/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3179 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3180 assert(MI.getOpcode() == TargetOpcode::G_AND);
3181
3182 Register Dst = MI.getOperand(0).getReg();
3183 LLT Ty = MRI.getType(Dst);
3184
3185 Register R;
3186 int64_t C1;
3187 int64_t C2;
3188 if (!mi_match(
3189 Dst, MRI,
3190 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3191 return false;
3192
3193 MatchInfo = [=](MachineIRBuilder &B) {
3194 if (C1 & C2) {
3195 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3196 return;
3197 }
3198 auto Zero = B.buildConstant(Ty, 0);
3199 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3200 };
3201 return true;
3202}
3203
3205 Register &Replacement) {
3206 // Given
3207 //
3208 // %y:_(sN) = G_SOMETHING
3209 // %x:_(sN) = G_SOMETHING
3210 // %res:_(sN) = G_AND %x, %y
3211 //
3212 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3213 //
3214 // Patterns like this can appear as a result of legalization. E.g.
3215 //
3216 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3217 // %one:_(s32) = G_CONSTANT i32 1
3218 // %and:_(s32) = G_AND %cmp, %one
3219 //
3220 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3221 assert(MI.getOpcode() == TargetOpcode::G_AND);
3222 if (!KB)
3223 return false;
3224
3225 Register AndDst = MI.getOperand(0).getReg();
3226 Register LHS = MI.getOperand(1).getReg();
3227 Register RHS = MI.getOperand(2).getReg();
3228
3229 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3230 // we can't do anything. If we do, then it depends on whether we have
3231 // KnownBits on the LHS.
3232 KnownBits RHSBits = KB->getKnownBits(RHS);
3233 if (RHSBits.isUnknown())
3234 return false;
3235
3236 KnownBits LHSBits = KB->getKnownBits(LHS);
3237
3238 // Check that x & Mask == x.
3239 // x & 1 == x, always
3240 // x & 0 == x, only if x is also 0
3241 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3242 //
3243 // Check if we can replace AndDst with the LHS of the G_AND
3244 if (canReplaceReg(AndDst, LHS, MRI) &&
3245 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3246 Replacement = LHS;
3247 return true;
3248 }
3249
3250 // Check if we can replace AndDst with the RHS of the G_AND
3251 if (canReplaceReg(AndDst, RHS, MRI) &&
3252 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3253 Replacement = RHS;
3254 return true;
3255 }
3256
3257 return false;
3258}
3259
3261 // Given
3262 //
3263 // %y:_(sN) = G_SOMETHING
3264 // %x:_(sN) = G_SOMETHING
3265 // %res:_(sN) = G_OR %x, %y
3266 //
3267 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3268 assert(MI.getOpcode() == TargetOpcode::G_OR);
3269 if (!KB)
3270 return false;
3271
3272 Register OrDst = MI.getOperand(0).getReg();
3273 Register LHS = MI.getOperand(1).getReg();
3274 Register RHS = MI.getOperand(2).getReg();
3275
3276 KnownBits LHSBits = KB->getKnownBits(LHS);
3277 KnownBits RHSBits = KB->getKnownBits(RHS);
3278
3279 // Check that x | Mask == x.
3280 // x | 0 == x, always
3281 // x | 1 == x, only if x is also 1
3282 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3283 //
3284 // Check if we can replace OrDst with the LHS of the G_OR
3285 if (canReplaceReg(OrDst, LHS, MRI) &&
3286 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3287 Replacement = LHS;
3288 return true;
3289 }
3290
3291 // Check if we can replace OrDst with the RHS of the G_OR
3292 if (canReplaceReg(OrDst, RHS, MRI) &&
3293 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3294 Replacement = RHS;
3295 return true;
3296 }
3297
3298 return false;
3299}
3300
3302 // If the input is already sign extended, just drop the extension.
3303 Register Src = MI.getOperand(1).getReg();
3304 unsigned ExtBits = MI.getOperand(2).getImm();
3305 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3306 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3307}
3308
3309static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3310 int64_t Cst, bool IsVector, bool IsFP) {
3311 // For i1, Cst will always be -1 regardless of boolean contents.
3312 return (ScalarSizeBits == 1 && Cst == -1) ||
3313 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3314}
3315
3317 SmallVectorImpl<Register> &RegsToNegate) {
3318 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3319 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3320 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3321 Register XorSrc;
3322 Register CstReg;
3323 // We match xor(src, true) here.
3324 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3325 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3326 return false;
3327
3328 if (!MRI.hasOneNonDBGUse(XorSrc))
3329 return false;
3330
3331 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3332 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3333 // list of tree nodes to visit.
3334 RegsToNegate.push_back(XorSrc);
3335 // Remember whether the comparisons are all integer or all floating point.
3336 bool IsInt = false;
3337 bool IsFP = false;
3338 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3339 Register Reg = RegsToNegate[I];
3340 if (!MRI.hasOneNonDBGUse(Reg))
3341 return false;
3342 MachineInstr *Def = MRI.getVRegDef(Reg);
3343 switch (Def->getOpcode()) {
3344 default:
3345 // Don't match if the tree contains anything other than ANDs, ORs and
3346 // comparisons.
3347 return false;
3348 case TargetOpcode::G_ICMP:
3349 if (IsFP)
3350 return false;
3351 IsInt = true;
3352 // When we apply the combine we will invert the predicate.
3353 break;
3354 case TargetOpcode::G_FCMP:
3355 if (IsInt)
3356 return false;
3357 IsFP = true;
3358 // When we apply the combine we will invert the predicate.
3359 break;
3360 case TargetOpcode::G_AND:
3361 case TargetOpcode::G_OR:
3362 // Implement De Morgan's laws:
3363 // ~(x & y) -> ~x | ~y
3364 // ~(x | y) -> ~x & ~y
3365 // When we apply the combine we will change the opcode and recursively
3366 // negate the operands.
3367 RegsToNegate.push_back(Def->getOperand(1).getReg());
3368 RegsToNegate.push_back(Def->getOperand(2).getReg());
3369 break;
3370 }
3371 }
3372
3373 // Now we know whether the comparisons are integer or floating point, check
3374 // the constant in the xor.
3375 int64_t Cst;
3376 if (Ty.isVector()) {
3377 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3378 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3379 if (!MaybeCst)
3380 return false;
3381 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3382 return false;
3383 } else {
3384 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3385 return false;
3386 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3387 return false;
3388 }
3389
3390 return true;
3391}
3392
3394 SmallVectorImpl<Register> &RegsToNegate) {
3395 for (Register Reg : RegsToNegate) {
3396 MachineInstr *Def = MRI.getVRegDef(Reg);
3397 Observer.changingInstr(*Def);
3398 // For each comparison, invert the opcode. For each AND and OR, change the
3399 // opcode.
3400 switch (Def->getOpcode()) {
3401 default:
3402 llvm_unreachable("Unexpected opcode");
3403 case TargetOpcode::G_ICMP:
3404 case TargetOpcode::G_FCMP: {
3405 MachineOperand &PredOp = Def->getOperand(1);
3408 PredOp.setPredicate(NewP);
3409 break;
3410 }
3411 case TargetOpcode::G_AND:
3412 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3413 break;
3414 case TargetOpcode::G_OR:
3415 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3416 break;
3417 }
3418 Observer.changedInstr(*Def);
3419 }
3420
3421 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3422 MI.eraseFromParent();
3423}
3424
3426 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3427 // Match (xor (and x, y), y) (or any of its commuted cases)
3428 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3429 Register &X = MatchInfo.first;
3430 Register &Y = MatchInfo.second;
3431 Register AndReg = MI.getOperand(1).getReg();
3432 Register SharedReg = MI.getOperand(2).getReg();
3433
3434 // Find a G_AND on either side of the G_XOR.
3435 // Look for one of
3436 //
3437 // (xor (and x, y), SharedReg)
3438 // (xor SharedReg, (and x, y))
3439 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3440 std::swap(AndReg, SharedReg);
3441 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3442 return false;
3443 }
3444
3445 // Only do this if we'll eliminate the G_AND.
3446 if (!MRI.hasOneNonDBGUse(AndReg))
3447 return false;
3448
3449 // We can combine if SharedReg is the same as either the LHS or RHS of the
3450 // G_AND.
3451 if (Y != SharedReg)
3452 std::swap(X, Y);
3453 return Y == SharedReg;
3454}
3455
3457 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3458 // Fold (xor (and x, y), y) -> (and (not x), y)
3459 Register X, Y;
3460 std::tie(X, Y) = MatchInfo;
3461 auto Not = Builder.buildNot(MRI.getType(X), X);
3463 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3464 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3465 MI.getOperand(2).setReg(Y);
3467}
3468
3470 auto &PtrAdd = cast<GPtrAdd>(MI);
3471 Register DstReg = PtrAdd.getReg(0);
3472 LLT Ty = MRI.getType(DstReg);
3474
3475 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3476 return false;
3477
3478 if (Ty.isPointer()) {
3479 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3480 return ConstVal && *ConstVal == 0;
3481 }
3482
3483 assert(Ty.isVector() && "Expecting a vector type");
3484 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3485 return isBuildVectorAllZeros(*VecMI, MRI);
3486}
3487
3489 auto &PtrAdd = cast<GPtrAdd>(MI);
3490 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3491 PtrAdd.eraseFromParent();
3492}
3493
3494/// The second source operand is known to be a power of 2.
3496 Register DstReg = MI.getOperand(0).getReg();
3497 Register Src0 = MI.getOperand(1).getReg();
3498 Register Pow2Src1 = MI.getOperand(2).getReg();
3499 LLT Ty = MRI.getType(DstReg);
3500
3501 // Fold (urem x, pow2) -> (and x, pow2-1)
3502 auto NegOne = Builder.buildConstant(Ty, -1);
3503 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3504 Builder.buildAnd(DstReg, Src0, Add);
3505 MI.eraseFromParent();
3506}
3507
3509 unsigned &SelectOpNo) {
3510 Register LHS = MI.getOperand(1).getReg();
3511 Register RHS = MI.getOperand(2).getReg();
3512
3513 Register OtherOperandReg = RHS;
3514 SelectOpNo = 1;
3516
3517 // Don't do this unless the old select is going away. We want to eliminate the
3518 // binary operator, not replace a binop with a select.
3519 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3521 OtherOperandReg = LHS;
3522 SelectOpNo = 2;
3524 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3526 return false;
3527 }
3528
3529 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3530 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3531
3532 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3533 /*AllowFP*/ true,
3534 /*AllowOpaqueConstants*/ false))
3535 return false;
3536 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3537 /*AllowFP*/ true,
3538 /*AllowOpaqueConstants*/ false))
3539 return false;
3540
3541 unsigned BinOpcode = MI.getOpcode();
3542
3543 // We know that one of the operands is a select of constants. Now verify that
3544 // the other binary operator operand is either a constant, or we can handle a
3545 // variable.
3546 bool CanFoldNonConst =
3547 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3548 (isNullOrNullSplat(*SelectLHS, MRI) ||
3549 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3550 (isNullOrNullSplat(*SelectRHS, MRI) ||
3551 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3552 if (CanFoldNonConst)
3553 return true;
3554
3555 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3556 /*AllowFP*/ true,
3557 /*AllowOpaqueConstants*/ false);
3558}
3559
3560/// \p SelectOperand is the operand in binary operator \p MI that is the select
3561/// to fold.
3563 const unsigned &SelectOperand) {
3564 Register Dst = MI.getOperand(0).getReg();
3565 Register LHS = MI.getOperand(1).getReg();
3566 Register RHS = MI.getOperand(2).getReg();
3567 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3568
3569 Register SelectCond = Select->getOperand(1).getReg();
3570 Register SelectTrue = Select->getOperand(2).getReg();
3571 Register SelectFalse = Select->getOperand(3).getReg();
3572
3573 LLT Ty = MRI.getType(Dst);
3574 unsigned BinOpcode = MI.getOpcode();
3575
3576 Register FoldTrue, FoldFalse;
3577
3578 // We have a select-of-constants followed by a binary operator with a
3579 // constant. Eliminate the binop by pulling the constant math into the select.
3580 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3581 if (SelectOperand == 1) {
3582 // TODO: SelectionDAG verifies this actually constant folds before
3583 // committing to the combine.
3584
3585 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3586 FoldFalse =
3587 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3588 } else {
3589 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3590 FoldFalse =
3591 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3592 }
3593
3594 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3595 MI.eraseFromParent();
3596}
3597
3598std::optional<SmallVector<Register, 8>>
3599CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3600 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3601 // We want to detect if Root is part of a tree which represents a bunch
3602 // of loads being merged into a larger load. We'll try to recognize patterns
3603 // like, for example:
3604 //
3605 // Reg Reg
3606 // \ /
3607 // OR_1 Reg
3608 // \ /
3609 // OR_2
3610 // \ Reg
3611 // .. /
3612 // Root
3613 //
3614 // Reg Reg Reg Reg
3615 // \ / \ /
3616 // OR_1 OR_2
3617 // \ /
3618 // \ /
3619 // ...
3620 // Root
3621 //
3622 // Each "Reg" may have been produced by a load + some arithmetic. This
3623 // function will save each of them.
3624 SmallVector<Register, 8> RegsToVisit;
3626
3627 // In the "worst" case, we're dealing with a load for each byte. So, there
3628 // are at most #bytes - 1 ORs.
3629 const unsigned MaxIter =
3630 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3631 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3632 if (Ors.empty())
3633 break;
3634 const MachineInstr *Curr = Ors.pop_back_val();
3635 Register OrLHS = Curr->getOperand(1).getReg();
3636 Register OrRHS = Curr->getOperand(2).getReg();
3637
3638 // In the combine, we want to elimate the entire tree.
3639 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3640 return std::nullopt;
3641
3642 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3643 // something that may be a load + arithmetic.
3644 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3645 Ors.push_back(Or);
3646 else
3647 RegsToVisit.push_back(OrLHS);
3648 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3649 Ors.push_back(Or);
3650 else
3651 RegsToVisit.push_back(OrRHS);
3652 }
3653
3654 // We're going to try and merge each register into a wider power-of-2 type,
3655 // so we ought to have an even number of registers.
3656 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3657 return std::nullopt;
3658 return RegsToVisit;
3659}
3660
3661/// Helper function for findLoadOffsetsForLoadOrCombine.
3662///
3663/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3664/// and then moving that value into a specific byte offset.
3665///
3666/// e.g. x[i] << 24
3667///
3668/// \returns The load instruction and the byte offset it is moved into.
3669static std::optional<std::pair<GZExtLoad *, int64_t>>
3670matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3671 const MachineRegisterInfo &MRI) {
3672 assert(MRI.hasOneNonDBGUse(Reg) &&
3673 "Expected Reg to only have one non-debug use?");
3674 Register MaybeLoad;
3675 int64_t Shift;
3676 if (!mi_match(Reg, MRI,
3677 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3678 Shift = 0;
3679 MaybeLoad = Reg;
3680 }
3681
3682 if (Shift % MemSizeInBits != 0)
3683 return std::nullopt;
3684
3685 // TODO: Handle other types of loads.
3686 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3687 if (!Load)
3688 return std::nullopt;
3689
3690 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3691 return std::nullopt;
3692
3693 return std::make_pair(Load, Shift / MemSizeInBits);
3694}
3695
3696std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3697CombinerHelper::findLoadOffsetsForLoadOrCombine(
3699 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3700
3701 // Each load found for the pattern. There should be one for each RegsToVisit.
3703
3704 // The lowest index used in any load. (The lowest "i" for each x[i].)
3705 int64_t LowestIdx = INT64_MAX;
3706
3707 // The load which uses the lowest index.
3708 GZExtLoad *LowestIdxLoad = nullptr;
3709
3710 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3711 SmallSet<int64_t, 8> SeenIdx;
3712
3713 // Ensure each load is in the same MBB.
3714 // TODO: Support multiple MachineBasicBlocks.
3715 MachineBasicBlock *MBB = nullptr;
3716 const MachineMemOperand *MMO = nullptr;
3717
3718 // Earliest instruction-order load in the pattern.
3719 GZExtLoad *EarliestLoad = nullptr;
3720
3721 // Latest instruction-order load in the pattern.
3722 GZExtLoad *LatestLoad = nullptr;
3723
3724 // Base pointer which every load should share.
3726
3727 // We want to find a load for each register. Each load should have some
3728 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3729 // track of the load which uses the lowest index. Later, we will check if we
3730 // can use its pointer in the final, combined load.
3731 for (auto Reg : RegsToVisit) {
3732 // Find the load, and find the position that it will end up in (e.g. a
3733 // shifted) value.
3734 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3735 if (!LoadAndPos)
3736 return std::nullopt;
3737 GZExtLoad *Load;
3738 int64_t DstPos;
3739 std::tie(Load, DstPos) = *LoadAndPos;
3740
3741 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3742 // it is difficult to check for stores/calls/etc between loads.
3743 MachineBasicBlock *LoadMBB = Load->getParent();
3744 if (!MBB)
3745 MBB = LoadMBB;
3746 if (LoadMBB != MBB)
3747 return std::nullopt;
3748
3749 // Make sure that the MachineMemOperands of every seen load are compatible.
3750 auto &LoadMMO = Load->getMMO();
3751 if (!MMO)
3752 MMO = &LoadMMO;
3753 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3754 return std::nullopt;
3755
3756 // Find out what the base pointer and index for the load is.
3757 Register LoadPtr;
3758 int64_t Idx;
3759 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3760 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3761 LoadPtr = Load->getOperand(1).getReg();
3762 Idx = 0;
3763 }
3764
3765 // Don't combine things like a[i], a[i] -> a bigger load.
3766 if (!SeenIdx.insert(Idx).second)
3767 return std::nullopt;
3768
3769 // Every load must share the same base pointer; don't combine things like:
3770 //
3771 // a[i], b[i + 1] -> a bigger load.
3772 if (!BasePtr.isValid())
3773 BasePtr = LoadPtr;
3774 if (BasePtr != LoadPtr)
3775 return std::nullopt;
3776
3777 if (Idx < LowestIdx) {
3778 LowestIdx = Idx;
3779 LowestIdxLoad = Load;
3780 }
3781
3782 // Keep track of the byte offset that this load ends up at. If we have seen
3783 // the byte offset, then stop here. We do not want to combine:
3784 //
3785 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3786 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3787 return std::nullopt;
3788 Loads.insert(Load);
3789
3790 // Keep track of the position of the earliest/latest loads in the pattern.
3791 // We will check that there are no load fold barriers between them later
3792 // on.
3793 //
3794 // FIXME: Is there a better way to check for load fold barriers?
3795 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3796 EarliestLoad = Load;
3797 if (!LatestLoad || dominates(*LatestLoad, *Load))
3798 LatestLoad = Load;
3799 }
3800
3801 // We found a load for each register. Let's check if each load satisfies the
3802 // pattern.
3803 assert(Loads.size() == RegsToVisit.size() &&
3804 "Expected to find a load for each register?");
3805 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3806 LatestLoad && "Expected at least two loads?");
3807
3808 // Check if there are any stores, calls, etc. between any of the loads. If
3809 // there are, then we can't safely perform the combine.
3810 //
3811 // MaxIter is chosen based off the (worst case) number of iterations it
3812 // typically takes to succeed in the LLVM test suite plus some padding.
3813 //
3814 // FIXME: Is there a better way to check for load fold barriers?
3815 const unsigned MaxIter = 20;
3816 unsigned Iter = 0;
3817 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3818 LatestLoad->getIterator())) {
3819 if (Loads.count(&MI))
3820 continue;
3821 if (MI.isLoadFoldBarrier())
3822 return std::nullopt;
3823 if (Iter++ == MaxIter)
3824 return std::nullopt;
3825 }
3826
3827 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3828}
3829
3831 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3832 assert(MI.getOpcode() == TargetOpcode::G_OR);
3833 MachineFunction &MF = *MI.getMF();
3834 // Assuming a little-endian target, transform:
3835 // s8 *a = ...
3836 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3837 // =>
3838 // s32 val = *((i32)a)
3839 //
3840 // s8 *a = ...
3841 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3842 // =>
3843 // s32 val = BSWAP(*((s32)a))
3844 Register Dst = MI.getOperand(0).getReg();
3845 LLT Ty = MRI.getType(Dst);
3846 if (Ty.isVector())
3847 return false;
3848
3849 // We need to combine at least two loads into this type. Since the smallest
3850 // possible load is into a byte, we need at least a 16-bit wide type.
3851 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3852 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3853 return false;
3854
3855 // Match a collection of non-OR instructions in the pattern.
3856 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3857 if (!RegsToVisit)
3858 return false;
3859
3860 // We have a collection of non-OR instructions. Figure out how wide each of
3861 // the small loads should be based off of the number of potential loads we
3862 // found.
3863 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3864 if (NarrowMemSizeInBits % 8 != 0)
3865 return false;
3866
3867 // Check if each register feeding into each OR is a load from the same
3868 // base pointer + some arithmetic.
3869 //
3870 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3871 //
3872 // Also verify that each of these ends up putting a[i] into the same memory
3873 // offset as a load into a wide type would.
3875 GZExtLoad *LowestIdxLoad, *LatestLoad;
3876 int64_t LowestIdx;
3877 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3878 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3879 if (!MaybeLoadInfo)
3880 return false;
3881 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3882
3883 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3884 // we found before, check if this corresponds to a big or little endian byte
3885 // pattern. If it does, then we can represent it using a load + possibly a
3886 // BSWAP.
3887 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3888 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3889 if (!IsBigEndian)
3890 return false;
3891 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3892 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3893 return false;
3894
3895 // Make sure that the load from the lowest index produces offset 0 in the
3896 // final value.
3897 //
3898 // This ensures that we won't combine something like this:
3899 //
3900 // load x[i] -> byte 2
3901 // load x[i+1] -> byte 0 ---> wide_load x[i]
3902 // load x[i+2] -> byte 1
3903 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3904 const unsigned ZeroByteOffset =
3905 *IsBigEndian
3906 ? bigEndianByteAt(NumLoadsInTy, 0)
3907 : littleEndianByteAt(NumLoadsInTy, 0);
3908 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3909 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3910 ZeroOffsetIdx->second != LowestIdx)
3911 return false;
3912
3913 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3914 // may not use index 0.
3915 Register Ptr = LowestIdxLoad->getPointerReg();
3916 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3917 LegalityQuery::MemDesc MMDesc(MMO);
3918 MMDesc.MemoryTy = Ty;
3920 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3921 return false;
3922 auto PtrInfo = MMO.getPointerInfo();
3923 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3924
3925 // Load must be allowed and fast on the target.
3927 auto &DL = MF.getDataLayout();
3928 unsigned Fast = 0;
3929 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3930 !Fast)
3931 return false;
3932
3933 MatchInfo = [=](MachineIRBuilder &MIB) {
3934 MIB.setInstrAndDebugLoc(*LatestLoad);
3935 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3936 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3937 if (NeedsBSwap)
3938 MIB.buildBSwap(Dst, LoadDst);
3939 };
3940 return true;
3941}
3942
3944 MachineInstr *&ExtMI) {
3945 auto &PHI = cast<GPhi>(MI);
3946 Register DstReg = PHI.getReg(0);
3947
3948 // TODO: Extending a vector may be expensive, don't do this until heuristics
3949 // are better.
3950 if (MRI.getType(DstReg).isVector())
3951 return false;
3952
3953 // Try to match a phi, whose only use is an extend.
3954 if (!MRI.hasOneNonDBGUse(DstReg))
3955 return false;
3956 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3957 switch (ExtMI->getOpcode()) {
3958 case TargetOpcode::G_ANYEXT:
3959 return true; // G_ANYEXT is usually free.
3960 case TargetOpcode::G_ZEXT:
3961 case TargetOpcode::G_SEXT:
3962 break;
3963 default:
3964 return false;
3965 }
3966
3967 // If the target is likely to fold this extend away, don't propagate.
3969 return false;
3970
3971 // We don't want to propagate the extends unless there's a good chance that
3972 // they'll be optimized in some way.
3973 // Collect the unique incoming values.
3975 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3976 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3977 switch (DefMI->getOpcode()) {
3978 case TargetOpcode::G_LOAD:
3979 case TargetOpcode::G_TRUNC:
3980 case TargetOpcode::G_SEXT:
3981 case TargetOpcode::G_ZEXT:
3982 case TargetOpcode::G_ANYEXT:
3983 case TargetOpcode::G_CONSTANT:
3984 InSrcs.insert(DefMI);
3985 // Don't try to propagate if there are too many places to create new
3986 // extends, chances are it'll increase code size.
3987 if (InSrcs.size() > 2)
3988 return false;
3989 break;
3990 default:
3991 return false;
3992 }
3993 }
3994 return true;
3995}
3996
3998 MachineInstr *&ExtMI) {
3999 auto &PHI = cast<GPhi>(MI);
4000 Register DstReg = ExtMI->getOperand(0).getReg();
4001 LLT ExtTy = MRI.getType(DstReg);
4002
4003 // Propagate the extension into the block of each incoming reg's block.
4004 // Use a SetVector here because PHIs can have duplicate edges, and we want
4005 // deterministic iteration order.
4008 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4009 auto SrcReg = PHI.getIncomingValue(I);
4010 auto *SrcMI = MRI.getVRegDef(SrcReg);
4011 if (!SrcMIs.insert(SrcMI))
4012 continue;
4013
4014 // Build an extend after each src inst.
4015 auto *MBB = SrcMI->getParent();
4016 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4017 if (InsertPt != MBB->end() && InsertPt->isPHI())
4018 InsertPt = MBB->getFirstNonPHI();
4019
4020 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4021 Builder.setDebugLoc(MI.getDebugLoc());
4022 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4023 OldToNewSrcMap[SrcMI] = NewExt;
4024 }
4025
4026 // Create a new phi with the extended inputs.
4028 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4029 NewPhi.addDef(DstReg);
4030 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4031 if (!MO.isReg()) {
4032 NewPhi.addMBB(MO.getMBB());
4033 continue;
4034 }
4035 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4036 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4037 }
4038 Builder.insertInstr(NewPhi);
4039 ExtMI->eraseFromParent();
4040}
4041
4043 Register &Reg) {
4044 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4045 // If we have a constant index, look for a G_BUILD_VECTOR source
4046 // and find the source register that the index maps to.
4047 Register SrcVec = MI.getOperand(1).getReg();
4048 LLT SrcTy = MRI.getType(SrcVec);
4049
4050 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4051 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4052 return false;
4053
4054 unsigned VecIdx = Cst->Value.getZExtValue();
4055
4056 // Check if we have a build_vector or build_vector_trunc with an optional
4057 // trunc in front.
4058 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4059 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4060 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4061 }
4062
4063 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4064 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4065 return false;
4066
4067 EVT Ty(getMVTForLLT(SrcTy));
4068 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4069 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4070 return false;
4071
4072 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4073 return true;
4074}
4075
4077 Register &Reg) {
4078 // Check the type of the register, since it may have come from a
4079 // G_BUILD_VECTOR_TRUNC.
4080 LLT ScalarTy = MRI.getType(Reg);
4081 Register DstReg = MI.getOperand(0).getReg();
4082 LLT DstTy = MRI.getType(DstReg);
4083
4084 if (ScalarTy != DstTy) {
4085 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4086 Builder.buildTrunc(DstReg, Reg);
4087 MI.eraseFromParent();
4088 return;
4089 }
4091}
4092
4095 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4096 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4097 // This combine tries to find build_vector's which have every source element
4098 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4099 // the masked load scalarization is run late in the pipeline. There's already
4100 // a combine for a similar pattern starting from the extract, but that
4101 // doesn't attempt to do it if there are multiple uses of the build_vector,
4102 // which in this case is true. Starting the combine from the build_vector
4103 // feels more natural than trying to find sibling nodes of extracts.
4104 // E.g.
4105 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4106 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4107 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4108 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4109 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4110 // ==>
4111 // replace ext{1,2,3,4} with %s{1,2,3,4}
4112
4113 Register DstReg = MI.getOperand(0).getReg();
4114 LLT DstTy = MRI.getType(DstReg);
4115 unsigned NumElts = DstTy.getNumElements();
4116
4117 SmallBitVector ExtractedElts(NumElts);
4118 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4119 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4120 return false;
4121 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4122 if (!Cst)
4123 return false;
4124 unsigned Idx = Cst->getZExtValue();
4125 if (Idx >= NumElts)
4126 return false; // Out of range.
4127 ExtractedElts.set(Idx);
4128 SrcDstPairs.emplace_back(
4129 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4130 }
4131 // Match if every element was extracted.
4132 return ExtractedElts.all();
4133}
4134
4137 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4138 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4139 for (auto &Pair : SrcDstPairs) {
4140 auto *ExtMI = Pair.second;
4141 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4142 ExtMI->eraseFromParent();
4143 }
4144 MI.eraseFromParent();
4145}
4146
4148 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4149 applyBuildFnNoErase(MI, MatchInfo);
4150 MI.eraseFromParent();
4151}
4152
4154 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4155 MatchInfo(Builder);
4156}
4157
4159 BuildFnTy &MatchInfo) {
4160 assert(MI.getOpcode() == TargetOpcode::G_OR);
4161
4162 Register Dst = MI.getOperand(0).getReg();
4163 LLT Ty = MRI.getType(Dst);
4164 unsigned BitWidth = Ty.getScalarSizeInBits();
4165
4166 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4167 unsigned FshOpc = 0;
4168
4169 // Match (or (shl ...), (lshr ...)).
4170 if (!mi_match(Dst, MRI,
4171 // m_GOr() handles the commuted version as well.
4172 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4173 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4174 return false;
4175
4176 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4177 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4178 int64_t CstShlAmt, CstLShrAmt;
4179 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4180 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4181 CstShlAmt + CstLShrAmt == BitWidth) {
4182 FshOpc = TargetOpcode::G_FSHR;
4183 Amt = LShrAmt;
4184
4185 } else if (mi_match(LShrAmt, MRI,
4187 ShlAmt == Amt) {
4188 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4189 FshOpc = TargetOpcode::G_FSHL;
4190
4191 } else if (mi_match(ShlAmt, MRI,
4193 LShrAmt == Amt) {
4194 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4195 FshOpc = TargetOpcode::G_FSHR;
4196
4197 } else {
4198 return false;
4199 }
4200
4201 LLT AmtTy = MRI.getType(Amt);
4202 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4203 return false;
4204
4205 MatchInfo = [=](MachineIRBuilder &B) {
4206 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4207 };
4208 return true;
4209}
4210
4211/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4213 unsigned Opc = MI.getOpcode();
4214 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4215 Register X = MI.getOperand(1).getReg();
4216 Register Y = MI.getOperand(2).getReg();
4217 if (X != Y)
4218 return false;
4219 unsigned RotateOpc =
4220 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4221 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4222}
4223
4225 unsigned Opc = MI.getOpcode();
4226 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4227 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4229 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4230 : TargetOpcode::G_ROTR));
4231 MI.removeOperand(2);
4233}
4234
4235// Fold (rot x, c) -> (rot x, c % BitSize)
4237 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4238 MI.getOpcode() == TargetOpcode::G_ROTR);
4239 unsigned Bitsize =
4240 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4241 Register AmtReg = MI.getOperand(2).getReg();
4242 bool OutOfRange = false;
4243 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4244 if (auto *CI = dyn_cast<ConstantInt>(C))
4245 OutOfRange |= CI->getValue().uge(Bitsize);
4246 return true;
4247 };
4248 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4249}
4250
4252 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4253 MI.getOpcode() == TargetOpcode::G_ROTR);
4254 unsigned Bitsize =
4255 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4256 Register Amt = MI.getOperand(2).getReg();
4257 LLT AmtTy = MRI.getType(Amt);
4258 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4259 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4261 MI.getOperand(2).setReg(Amt);
4263}
4264
4266 int64_t &MatchInfo) {
4267 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4268 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4269
4270 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4271 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4272 // KnownBits on the LHS in two cases:
4273 //
4274 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4275 // we cannot do any transforms so we can safely bail out early.
4276 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4277 // >=0.
4278 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4279 if (KnownRHS.isUnknown())
4280 return false;
4281
4282 std::optional<bool> KnownVal;
4283 if (KnownRHS.isZero()) {
4284 // ? uge 0 -> always true
4285 // ? ult 0 -> always false
4286 if (Pred == CmpInst::ICMP_UGE)
4287 KnownVal = true;
4288 else if (Pred == CmpInst::ICMP_ULT)
4289 KnownVal = false;
4290 }
4291
4292 if (!KnownVal) {
4293 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4294 switch (Pred) {
4295 default:
4296 llvm_unreachable("Unexpected G_ICMP predicate?");
4297 case CmpInst::ICMP_EQ:
4298 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4299 break;
4300 case CmpInst::ICMP_NE:
4301 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4302 break;
4303 case CmpInst::ICMP_SGE:
4304 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4305 break;
4306 case CmpInst::ICMP_SGT:
4307 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4308 break;
4309 case CmpInst::ICMP_SLE:
4310 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4311 break;
4312 case CmpInst::ICMP_SLT:
4313 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4314 break;
4315 case CmpInst::ICMP_UGE:
4316 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4317 break;
4318 case CmpInst::ICMP_UGT:
4319 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4320 break;
4321 case CmpInst::ICMP_ULE:
4322 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4323 break;
4324 case CmpInst::ICMP_ULT:
4325 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4326 break;
4327 }
4328 }
4329
4330 if (!KnownVal)
4331 return false;
4332 MatchInfo =
4333 *KnownVal
4335 /*IsVector = */
4336 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4337 /* IsFP = */ false)
4338 : 0;
4339 return true;
4340}
4341
4343 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4344 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4345 // Given:
4346 //
4347 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4348 // %cmp = G_ICMP ne %x, 0
4349 //
4350 // Or:
4351 //
4352 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4353 // %cmp = G_ICMP eq %x, 1
4354 //
4355 // We can replace %cmp with %x assuming true is 1 on the target.
4356 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4357 if (!CmpInst::isEquality(Pred))
4358 return false;
4359 Register Dst = MI.getOperand(0).getReg();
4360 LLT DstTy = MRI.getType(Dst);
4362 /* IsFP = */ false) != 1)
4363 return false;
4364 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4365 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4366 return false;
4367 Register LHS = MI.getOperand(2).getReg();
4368 auto KnownLHS = KB->getKnownBits(LHS);
4369 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4370 return false;
4371 // Make sure replacing Dst with the LHS is a legal operation.
4372 LLT LHSTy = MRI.getType(LHS);
4373 unsigned LHSSize = LHSTy.getSizeInBits();
4374 unsigned DstSize = DstTy.getSizeInBits();
4375 unsigned Op = TargetOpcode::COPY;
4376 if (DstSize != LHSSize)
4377 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4378 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4379 return false;
4380 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4381 return true;
4382}
4383
4384// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4386 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4387 assert(MI.getOpcode() == TargetOpcode::G_AND);
4388
4389 // Ignore vector types to simplify matching the two constants.
4390 // TODO: do this for vectors and scalars via a demanded bits analysis.
4391 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4392 if (Ty.isVector())
4393 return false;
4394
4395 Register Src;
4396 Register AndMaskReg;
4397 int64_t AndMaskBits;
4398 int64_t OrMaskBits;
4399 if (!mi_match(MI, MRI,
4400 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4401 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4402 return false;
4403
4404 // Check if OrMask could turn on any bits in Src.
4405 if (AndMaskBits & OrMaskBits)
4406 return false;
4407
4408 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4410 // Canonicalize the result to have the constant on the RHS.
4411 if (MI.getOperand(1).getReg() == AndMaskReg)
4412 MI.getOperand(2).setReg(AndMaskReg);
4413 MI.getOperand(1).setReg(Src);
4415 };
4416 return true;
4417}
4418
4419/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4421 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4422 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4423 Register Dst = MI.getOperand(0).getReg();
4424 Register Src = MI.getOperand(1).getReg();
4425 LLT Ty = MRI.getType(Src);
4427 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4428 return false;
4429 int64_t Width = MI.getOperand(2).getImm();
4430 Register ShiftSrc;
4431 int64_t ShiftImm;
4432 if (!mi_match(
4433 Src, MRI,
4434 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4435 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4436 return false;
4437 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4438 return false;
4439
4440 MatchInfo = [=](MachineIRBuilder &B) {
4441 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4442 auto Cst2 = B.buildConstant(ExtractTy, Width);
4443 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4444 };
4445 return true;
4446}
4447
4448/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4450 BuildFnTy &MatchInfo) {
4451 GAnd *And = cast<GAnd>(&MI);
4452 Register Dst = And->getReg(0);
4453 LLT Ty = MRI.getType(Dst);
4455 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4456 // into account.
4457 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4458 return false;
4459
4460 int64_t AndImm, LSBImm;
4461 Register ShiftSrc;
4462 const unsigned Size = Ty.getScalarSizeInBits();
4463 if (!mi_match(And->getReg(0), MRI,
4464 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4465 m_ICst(AndImm))))
4466 return false;
4467
4468 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4469 auto MaybeMask = static_cast<uint64_t>(AndImm);
4470 if (MaybeMask & (MaybeMask + 1))
4471 return false;
4472
4473 // LSB must fit within the register.
4474 if (static_cast<uint64_t>(LSBImm) >= Size)
4475 return false;
4476
4477 uint64_t Width = APInt(Size, AndImm).countr_one();
4478 MatchInfo = [=](MachineIRBuilder &B) {
4479 auto WidthCst = B.buildConstant(ExtractTy, Width);
4480 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4481 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4482 };
4483 return true;
4484}
4485
4487 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4488 const unsigned Opcode = MI.getOpcode();
4489 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4490
4491 const Register Dst = MI.getOperand(0).getReg();
4492
4493 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4494 ? TargetOpcode::G_SBFX
4495 : TargetOpcode::G_UBFX;
4496
4497 // Check if the type we would use for the extract is legal
4498 LLT Ty = MRI.getType(Dst);
4500 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4501 return false;
4502
4503 Register ShlSrc;
4504 int64_t ShrAmt;
4505 int64_t ShlAmt;
4506 const unsigned Size = Ty.getScalarSizeInBits();
4507
4508 // Try to match shr (shl x, c1), c2
4509 if (!mi_match(Dst, MRI,
4510 m_BinOp(Opcode,
4511 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4512 m_ICst(ShrAmt))))
4513 return false;
4514
4515 // Make sure that the shift sizes can fit a bitfield extract
4516 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4517 return false;
4518
4519 // Skip this combine if the G_SEXT_INREG combine could handle it
4520 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4521 return false;
4522
4523 // Calculate start position and width of the extract
4524 const int64_t Pos = ShrAmt - ShlAmt;
4525 const int64_t Width = Size - ShrAmt;
4526
4527 MatchInfo = [=](MachineIRBuilder &B) {
4528 auto WidthCst = B.buildConstant(ExtractTy, Width);
4529 auto PosCst = B.buildConstant(ExtractTy, Pos);
4530 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4531 };
4532 return true;
4533}
4534
4536 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4537 const unsigned Opcode = MI.getOpcode();
4538 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4539
4540 const Register Dst = MI.getOperand(0).getReg();
4541 LLT Ty = MRI.getType(Dst);
4543 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4544 return false;
4545
4546 // Try to match shr (and x, c1), c2
4547 Register AndSrc;
4548 int64_t ShrAmt;
4549 int64_t SMask;
4550 if (!mi_match(Dst, MRI,
4551 m_BinOp(Opcode,
4552 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4553 m_ICst(ShrAmt))))
4554 return false;
4555
4556 const unsigned Size = Ty.getScalarSizeInBits();
4557 if (ShrAmt < 0 || ShrAmt >= Size)
4558 return false;
4559
4560 // If the shift subsumes the mask, emit the 0 directly.
4561 if (0 == (SMask >> ShrAmt)) {
4562 MatchInfo = [=](MachineIRBuilder &B) {
4563 B.buildConstant(Dst, 0);
4564 };
4565 return true;
4566 }
4567
4568 // Check that ubfx can do the extraction, with no holes in the mask.
4569 uint64_t UMask = SMask;
4570 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4571 UMask &= maskTrailingOnes<uint64_t>(Size);
4572 if (!isMask_64(UMask))
4573 return false;
4574
4575 // Calculate start position and width of the extract.
4576 const int64_t Pos = ShrAmt;
4577 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4578
4579 // It's preferable to keep the shift, rather than form G_SBFX.
4580 // TODO: remove the G_AND via demanded bits analysis.
4581 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4582 return false;
4583
4584 MatchInfo = [=](MachineIRBuilder &B) {
4585 auto WidthCst = B.buildConstant(ExtractTy, Width);
4586 auto PosCst = B.buildConstant(ExtractTy, Pos);
4587 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4588 };
4589 return true;
4590}
4591
4592bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4593 MachineInstr &MI) {
4594 auto &PtrAdd = cast<GPtrAdd>(MI);
4595
4596 Register Src1Reg = PtrAdd.getBaseReg();
4597 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4598 if (!Src1Def)
4599 return false;
4600
4601 Register Src2Reg = PtrAdd.getOffsetReg();
4602
4603 if (MRI.hasOneNonDBGUse(Src1Reg))
4604 return false;
4605
4606 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4607 if (!C1)
4608 return false;
4609 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4610 if (!C2)
4611 return false;
4612
4613 const APInt &C1APIntVal = *C1;
4614 const APInt &C2APIntVal = *C2;
4615 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4616
4617 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4618 // This combine may end up running before ptrtoint/inttoptr combines
4619 // manage to eliminate redundant conversions, so try to look through them.
4620 MachineInstr *ConvUseMI = &UseMI;
4621 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4622 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4623 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4624 Register DefReg = ConvUseMI->getOperand(0).getReg();
4625 if (!MRI.hasOneNonDBGUse(DefReg))
4626 break;
4627 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4628 ConvUseOpc = ConvUseMI->getOpcode();
4629 }
4630 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4631 if (!LdStMI)
4632 continue;
4633 // Is x[offset2] already not a legal addressing mode? If so then
4634 // reassociating the constants breaks nothing (we test offset2 because
4635 // that's the one we hope to fold into the load or store).
4637 AM.HasBaseReg = true;
4638 AM.BaseOffs = C2APIntVal.getSExtValue();
4639 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4640 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4641 PtrAdd.getMF()->getFunction().getContext());
4642 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4643 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4644 AccessTy, AS))
4645 continue;
4646
4647 // Would x[offset1+offset2] still be a legal addressing mode?
4648 AM.BaseOffs = CombinedValue;
4649 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4650 AccessTy, AS))
4651 return true;
4652 }
4653
4654 return false;
4655}
4656
4658 MachineInstr *RHS,
4659 BuildFnTy &MatchInfo) {
4660 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4661 Register Src1Reg = MI.getOperand(1).getReg();
4662 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4663 return false;
4664 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4665 if (!C2)
4666 return false;
4667
4668 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4669 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4670
4671 auto NewBase =
4672 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4674 MI.getOperand(1).setReg(NewBase.getReg(0));
4675 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4677 };
4678 return !reassociationCanBreakAddressingModePattern(MI);
4679}
4680
4682 MachineInstr *LHS,
4683 MachineInstr *RHS,
4684 BuildFnTy &MatchInfo) {
4685 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4686 // if and only if (G_PTR_ADD X, C) has one use.
4687 Register LHSBase;
4688 std::optional<ValueAndVReg> LHSCstOff;
4689 if (!mi_match(MI.getBaseReg(), MRI,
4690 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4691 return false;
4692
4693 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4694 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4695 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4696 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4697 // doesn't happen.
4698 LHSPtrAdd->moveBefore(&MI);
4699 Register RHSReg = MI.getOffsetReg();
4700 // set VReg will cause type mismatch if it comes from extend/trunc
4701 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4703 MI.getOperand(2).setReg(NewCst.getReg(0));
4705 Observer.changingInstr(*LHSPtrAdd);
4706 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4707 Observer.changedInstr(*LHSPtrAdd);
4708 };
4709 return !reassociationCanBreakAddressingModePattern(MI);
4710}
4711
4713 MachineInstr *LHS,
4714 MachineInstr *RHS,
4715 BuildFnTy &MatchInfo) {
4716 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4717 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4718 if (!LHSPtrAdd)
4719 return false;
4720
4721 Register Src2Reg = MI.getOperand(2).getReg();
4722 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4723 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4724 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4725 if (!C1)
4726 return false;
4727 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4728 if (!C2)
4729 return false;
4730
4731 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4732 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4734 MI.getOperand(1).setReg(LHSSrc1);
4735 MI.getOperand(2).setReg(NewCst.getReg(0));
4737 };
4738 return !reassociationCanBreakAddressingModePattern(MI);
4739}
4740
4742 BuildFnTy &MatchInfo) {
4743 auto &PtrAdd = cast<GPtrAdd>(MI);
4744 // We're trying to match a few pointer computation patterns here for
4745 // re-association opportunities.
4746 // 1) Isolating a constant operand to be on the RHS, e.g.:
4747 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4748 //
4749 // 2) Folding two constants in each sub-tree as long as such folding
4750 // doesn't break a legal addressing mode.
4751 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4752 //
4753 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4754 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4755 // iif (G_PTR_ADD X, C) has one use.
4756 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4757 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4758
4759 // Try to match example 2.
4760 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4761 return true;
4762
4763 // Try to match example 3.
4764 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4765 return true;
4766
4767 // Try to match example 1.
4768 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4769 return true;
4770
4771 return false;
4772}
4774 Register OpLHS, Register OpRHS,
4775 BuildFnTy &MatchInfo) {
4776 LLT OpRHSTy = MRI.getType(OpRHS);
4777 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4778
4779 if (OpLHSDef->getOpcode() != Opc)
4780 return false;
4781
4782 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4783 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4784 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4785
4786 // If the inner op is (X op C), pull the constant out so it can be folded with
4787 // other constants in the expression tree. Folding is not guaranteed so we
4788 // might have (C1 op C2). In that case do not pull a constant out because it
4789 // won't help and can lead to infinite loops.
4792 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4793 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4794 MatchInfo = [=](MachineIRBuilder &B) {
4795 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4796 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4797 };
4798 return true;
4799 }
4800 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4801 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4802 // iff (op x, c1) has one use
4803 MatchInfo = [=](MachineIRBuilder &B) {
4804 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4805 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4806 };
4807 return true;
4808 }
4809 }
4810
4811 return false;
4812}
4813
4815 BuildFnTy &MatchInfo) {
4816 // We don't check if the reassociation will break a legal addressing mode
4817 // here since pointer arithmetic is handled by G_PTR_ADD.
4818 unsigned Opc = MI.getOpcode();
4819 Register DstReg = MI.getOperand(0).getReg();
4820 Register LHSReg = MI.getOperand(1).getReg();
4821 Register RHSReg = MI.getOperand(2).getReg();
4822
4823 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4824 return true;
4825 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4826 return true;
4827 return false;
4828}
4829
4831 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4832 Register SrcOp = MI.getOperand(1).getReg();
4833
4834 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4835 MatchInfo = *MaybeCst;
4836 return true;
4837 }
4838
4839 return false;
4840}
4841
4843 Register Op1 = MI.getOperand(1).getReg();
4844 Register Op2 = MI.getOperand(2).getReg();
4845 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4846 if (!MaybeCst)
4847 return false;
4848 MatchInfo = *MaybeCst;
4849 return true;
4850}
4851
4853 Register Op1 = MI.getOperand(1).getReg();
4854 Register Op2 = MI.getOperand(2).getReg();
4855 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4856 if (!MaybeCst)
4857 return false;
4858 MatchInfo =
4859 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4860 return true;
4861}
4862
4864 ConstantFP *&MatchInfo) {
4865 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4866 MI.getOpcode() == TargetOpcode::G_FMAD);
4867 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4868
4869 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4870 if (!Op3Cst)
4871 return false;
4872
4873 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4874 if (!Op2Cst)
4875 return false;
4876
4877 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4878 if (!Op1Cst)
4879 return false;
4880
4881 APFloat Op1F = Op1Cst->getValueAPF();
4882 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4884 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4885 return true;
4886}
4887
4889 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4890 // Look for a binop feeding into an AND with a mask:
4891 //
4892 // %add = G_ADD %lhs, %rhs
4893 // %and = G_AND %add, 000...11111111
4894 //
4895 // Check if it's possible to perform the binop at a narrower width and zext
4896 // back to the original width like so:
4897 //
4898 // %narrow_lhs = G_TRUNC %lhs
4899 // %narrow_rhs = G_TRUNC %rhs
4900 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4901 // %new_add = G_ZEXT %narrow_add
4902 // %and = G_AND %new_add, 000...11111111
4903 //
4904 // This can allow later combines to eliminate the G_AND if it turns out
4905 // that the mask is irrelevant.
4906 assert(MI.getOpcode() == TargetOpcode::G_AND);
4907 Register Dst = MI.getOperand(0).getReg();
4908 Register AndLHS = MI.getOperand(1).getReg();
4909 Register AndRHS = MI.getOperand(2).getReg();
4910 LLT WideTy = MRI.getType(Dst);
4911
4912 // If the potential binop has more than one use, then it's possible that one
4913 // of those uses will need its full width.
4914 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4915 return false;
4916
4917 // Check if the LHS feeding the AND is impacted by the high bits that we're
4918 // masking out.
4919 //
4920 // e.g. for 64-bit x, y:
4921 //
4922 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4923 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4924 if (!LHSInst)
4925 return false;
4926 unsigned LHSOpc = LHSInst->getOpcode();
4927 switch (LHSOpc) {
4928 default:
4929 return false;
4930 case TargetOpcode::G_ADD:
4931 case TargetOpcode::G_SUB:
4932 case TargetOpcode::G_MUL:
4933 case TargetOpcode::G_AND:
4934 case TargetOpcode::G_OR:
4935 case TargetOpcode::G_XOR:
4936 break;
4937 }
4938
4939 // Find the mask on the RHS.
4940 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4941 if (!Cst)
4942 return false;
4943 auto Mask = Cst->Value;
4944 if (!Mask.isMask())
4945 return false;
4946
4947 // No point in combining if there's nothing to truncate.
4948 unsigned NarrowWidth = Mask.countr_one();
4949 if (NarrowWidth == WideTy.getSizeInBits())
4950 return false;
4951 LLT NarrowTy = LLT::scalar(NarrowWidth);
4952
4953 // Check if adding the zext + truncates could be harmful.
4954 auto &MF = *MI.getMF();
4955 const auto &TLI = getTargetLowering();
4956 LLVMContext &Ctx = MF.getFunction().getContext();
4957 auto &DL = MF.getDataLayout();
4958 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4959 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4960 return false;
4961 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4962 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4963 return false;
4964 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4965 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4966 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4967 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4968 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4969 auto NarrowBinOp =
4970 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4971 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4973 MI.getOperand(1).setReg(Ext.getReg(0));
4975 };
4976 return true;
4977}
4978
4980 unsigned Opc = MI.getOpcode();
4981 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4982
4983 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4984 return false;
4985
4986 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4988 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4989 : TargetOpcode::G_SADDO;
4990 MI.setDesc(Builder.getTII().get(NewOpc));
4991 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4993 };
4994 return true;
4995}
4996
4998 // (G_*MULO x, 0) -> 0 + no carry out
4999 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5000 MI.getOpcode() == TargetOpcode::G_SMULO);
5001 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5002 return false;
5003 Register Dst = MI.getOperand(0).getReg();
5004 Register Carry = MI.getOperand(1).getReg();
5007 return false;
5008 MatchInfo = [=](MachineIRBuilder &B) {
5009 B.buildConstant(Dst, 0);
5010 B.buildConstant(Carry, 0);
5011 };
5012 return true;
5013}
5014
5016 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5017 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5018 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5019 MI.getOpcode() == TargetOpcode::G_SADDE ||
5020 MI.getOpcode() == TargetOpcode::G_USUBE ||
5021 MI.getOpcode() == TargetOpcode::G_SSUBE);
5022 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5023 return false;
5024 MatchInfo = [&](MachineIRBuilder &B) {
5025 unsigned NewOpcode;
5026 switch (MI.getOpcode()) {
5027 case TargetOpcode::G_UADDE:
5028 NewOpcode = TargetOpcode::G_UADDO;
5029 break;
5030 case TargetOpcode::G_SADDE:
5031 NewOpcode = TargetOpcode::G_SADDO;
5032 break;
5033 case TargetOpcode::G_USUBE:
5034 NewOpcode = TargetOpcode::G_USUBO;
5035 break;
5036 case TargetOpcode::G_SSUBE:
5037 NewOpcode = TargetOpcode::G_SSUBO;
5038 break;
5039 }
5041 MI.setDesc(B.getTII().get(NewOpcode));
5042 MI.removeOperand(4);
5044 };
5045 return true;
5046}
5047
5049 BuildFnTy &MatchInfo) {
5050 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5051 Register Dst = MI.getOperand(0).getReg();
5052 // (x + y) - z -> x (if y == z)
5053 // (x + y) - z -> y (if x == z)
5054 Register X, Y, Z;
5055 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5056 Register ReplaceReg;
5057 int64_t CstX, CstY;
5058 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5060 ReplaceReg = X;
5061 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5063 ReplaceReg = Y;
5064 if (ReplaceReg) {
5065 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5066 return true;
5067 }
5068 }
5069
5070 // x - (y + z) -> 0 - y (if x == z)
5071 // x - (y + z) -> 0 - z (if x == y)
5072 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5073 Register ReplaceReg;
5074 int64_t CstX;
5075 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5077 ReplaceReg = Y;
5078 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5080 ReplaceReg = Z;
5081 if (ReplaceReg) {
5082 MatchInfo = [=](MachineIRBuilder &B) {
5083 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5084 B.buildSub(Dst, Zero, ReplaceReg);
5085 };
5086 return true;
5087 }
5088 }
5089 return false;
5090}
5091
5093 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5094 auto &UDiv = cast<GenericMachineInstr>(MI);
5095 Register Dst = UDiv.getReg(0);
5096 Register LHS = UDiv.getReg(1);
5097 Register RHS = UDiv.getReg(2);
5098 LLT Ty = MRI.getType(Dst);
5099 LLT ScalarTy = Ty.getScalarType();
5100 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5102 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5103
5104 auto &MIB = Builder;
5105
5106 bool UseSRL = false;
5107 SmallVector<Register, 16> Shifts, Factors;
5108 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5109 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5110
5111 auto BuildExactUDIVPattern = [&](const Constant *C) {
5112 // Don't recompute inverses for each splat element.
5113 if (IsSplat && !Factors.empty()) {
5114 Shifts.push_back(Shifts[0]);
5115 Factors.push_back(Factors[0]);
5116 return true;
5117 }
5118
5119 auto *CI = cast<ConstantInt>(C);
5120 APInt Divisor = CI->getValue();
5121 unsigned Shift = Divisor.countr_zero();
5122 if (Shift) {
5123 Divisor.lshrInPlace(Shift);
5124 UseSRL = true;
5125 }
5126
5127 // Calculate the multiplicative inverse modulo BW.
5128 APInt Factor = Divisor.multiplicativeInverse();
5129 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5130 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5131 return true;
5132 };
5133
5134 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5135 // Collect all magic values from the build vector.
5136 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5137 llvm_unreachable("Expected unary predicate match to succeed");
5138
5139 Register Shift, Factor;
5140 if (Ty.isVector()) {
5141 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5142 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5143 } else {
5144 Shift = Shifts[0];
5145 Factor = Factors[0];
5146 }
5147
5148 Register Res = LHS;
5149
5150 if (UseSRL)
5151 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5152
5153 return MIB.buildMul(Ty, Res, Factor);
5154 }
5155
5156 unsigned KnownLeadingZeros =
5158
5159 bool UseNPQ = false;
5160 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5161 auto BuildUDIVPattern = [&](const Constant *C) {
5162 auto *CI = cast<ConstantInt>(C);
5163 const APInt &Divisor = CI->getValue();
5164
5165 bool SelNPQ = false;
5166 APInt Magic(Divisor.getBitWidth(), 0);
5167 unsigned PreShift = 0, PostShift = 0;
5168
5169 // Magic algorithm doesn't work for division by 1. We need to emit a select
5170 // at the end.
5171 // TODO: Use undef values for divisor of 1.
5172 if (!Divisor.isOne()) {
5173
5174 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5175 // in the dividend exceeds the leading zeros for the divisor.
5178 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5179
5180 Magic = std::move(magics.Magic);
5181
5182 assert(magics.PreShift < Divisor.getBitWidth() &&
5183 "We shouldn't generate an undefined shift!");
5184 assert(magics.PostShift < Divisor.getBitWidth() &&
5185 "We shouldn't generate an undefined shift!");
5186 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5187 PreShift = magics.PreShift;
5188 PostShift = magics.PostShift;
5189 SelNPQ = magics.IsAdd;
5190 }
5191
5192 PreShifts.push_back(
5193 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5194 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5195 NPQFactors.push_back(
5196 MIB.buildConstant(ScalarTy,
5197 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5198 : APInt::getZero(EltBits))
5199 .getReg(0));
5200 PostShifts.push_back(
5201 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5202 UseNPQ |= SelNPQ;
5203 return true;
5204 };
5205
5206 // Collect the shifts/magic values from each element.
5207 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5208 (void)Matched;
5209 assert(Matched && "Expected unary predicate match to succeed");
5210
5211 Register PreShift, PostShift, MagicFactor, NPQFactor;
5212 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5213 if (RHSDef) {
5214 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5215 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5216 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5217 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5218 } else {
5220 "Non-build_vector operation should have been a scalar");
5221 PreShift = PreShifts[0];
5222 MagicFactor = MagicFactors[0];
5223 PostShift = PostShifts[0];
5224 }
5225
5226 Register Q = LHS;
5227 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5228
5229 // Multiply the numerator (operand 0) by the magic value.
5230 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5231
5232 if (UseNPQ) {
5233 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5234
5235 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5236 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5237 if (Ty.isVector())
5238 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5239 else
5240 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5241
5242 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5243 }
5244
5245 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5246 auto One = MIB.buildConstant(Ty, 1);
5247 auto IsOne = MIB.buildICmp(
5249 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5250 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5251}
5252
5254 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5255 Register Dst = MI.getOperand(0).getReg();
5256 Register RHS = MI.getOperand(2).getReg();
5257 LLT DstTy = MRI.getType(Dst);
5258
5259 auto &MF = *MI.getMF();
5260 AttributeList Attr = MF.getFunction().getAttributes();
5261 const auto &TLI = getTargetLowering();
5262 LLVMContext &Ctx = MF.getFunction().getContext();
5263 auto &DL = MF.getDataLayout();
5264 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5265 return false;
5266
5267 // Don't do this for minsize because the instruction sequence is usually
5268 // larger.
5269 if (MF.getFunction().hasMinSize())
5270 return false;
5271
5272 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5273 return matchUnaryPredicate(
5274 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5275 }
5276
5277 auto *RHSDef = MRI.getVRegDef(RHS);
5278 if (!isConstantOrConstantVector(*RHSDef, MRI))
5279 return false;
5280
5281 // Don't do this if the types are not going to be legal.
5282 if (LI) {
5283 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5284 return false;
5285 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5286 return false;
5288 {TargetOpcode::G_ICMP,
5289 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5290 DstTy}}))
5291 return false;
5292 }
5293
5294 return matchUnaryPredicate(
5295 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5296}
5297
5299 auto *NewMI = buildUDivUsingMul(MI);
5300 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5301}
5302
5304 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5305 Register Dst = MI.getOperand(0).getReg();
5306 Register RHS = MI.getOperand(2).getReg();
5307 LLT DstTy = MRI.getType(Dst);
5308
5309 auto &MF = *MI.getMF();
5310 AttributeList Attr = MF.getFunction().getAttributes();
5311 const auto &TLI = getTargetLowering();
5312 LLVMContext &Ctx = MF.getFunction().getContext();
5313 auto &DL = MF.getDataLayout();
5314 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5315 return false;
5316
5317 // Don't do this for minsize because the instruction sequence is usually
5318 // larger.
5319 if (MF.getFunction().hasMinSize())
5320 return false;
5321
5322 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5323 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5324 return matchUnaryPredicate(
5325 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5326 }
5327
5328 // Don't support the general case for now.
5329 return false;
5330}
5331
5333 auto *NewMI = buildSDivUsingMul(MI);
5334 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5335}
5336
5338 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5339 auto &SDiv = cast<GenericMachineInstr>(MI);
5340 Register Dst = SDiv.getReg(0);
5341 Register LHS = SDiv.getReg(1);
5342 Register RHS = SDiv.getReg(2);
5343 LLT Ty = MRI.getType(Dst);
5344 LLT ScalarTy = Ty.getScalarType();
5346 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5347 auto &MIB = Builder;
5348
5349 bool UseSRA = false;
5350 SmallVector<Register, 16> Shifts, Factors;
5351
5352 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5353 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5354
5355 auto BuildSDIVPattern = [&](const Constant *C) {
5356 // Don't recompute inverses for each splat element.
5357 if (IsSplat && !Factors.empty()) {
5358 Shifts.push_back(Shifts[0]);
5359 Factors.push_back(Factors[0]);
5360 return true;
5361 }
5362
5363 auto *CI = cast<ConstantInt>(C);
5364 APInt Divisor = CI->getValue();
5365 unsigned Shift = Divisor.countr_zero();
5366 if (Shift) {
5367 Divisor.ashrInPlace(Shift);
5368 UseSRA = true;
5369 }
5370
5371 // Calculate the multiplicative inverse modulo BW.
5372 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5373 APInt Factor = Divisor.multiplicativeInverse();
5374 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5375 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5376 return true;
5377 };
5378
5379 // Collect all magic values from the build vector.
5380 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5381 (void)Matched;
5382 assert(Matched && "Expected unary predicate match to succeed");
5383
5384 Register Shift, Factor;
5385 if (Ty.isVector()) {
5386 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5387 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5388 } else {
5389 Shift = Shifts[0];
5390 Factor = Factors[0];
5391 }
5392
5393 Register Res = LHS;
5394
5395 if (UseSRA)
5396 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5397
5398 return MIB.buildMul(Ty, Res, Factor);
5399}
5400
5402 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5403 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5404 "Expected SDIV or UDIV");
5405 auto &Div = cast<GenericMachineInstr>(MI);
5406 Register RHS = Div.getReg(2);
5407 auto MatchPow2 = [&](const Constant *C) {
5408 auto *CI = dyn_cast<ConstantInt>(C);
5409 return CI && (CI->getValue().isPowerOf2() ||
5410 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5411 };
5412 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5413}
5414
5416 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5417 auto &SDiv = cast<GenericMachineInstr>(MI);
5418 Register Dst = SDiv.getReg(0);
5419 Register LHS = SDiv.getReg(1);
5420 Register RHS = SDiv.getReg(2);
5421 LLT Ty = MRI.getType(Dst);
5423 LLT CCVT =
5424 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5425
5426 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5427 // to the following version:
5428 //
5429 // %c1 = G_CTTZ %rhs
5430 // %inexact = G_SUB $bitwidth, %c1
5431 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5432 // %lshr = G_LSHR %sign, %inexact
5433 // %add = G_ADD %lhs, %lshr
5434 // %ashr = G_ASHR %add, %c1
5435 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5436 // %zero = G_CONSTANT $0
5437 // %neg = G_NEG %ashr
5438 // %isneg = G_ICMP SLT %rhs, %zero
5439 // %res = G_SELECT %isneg, %neg, %ashr
5440
5441 unsigned BitWidth = Ty.getScalarSizeInBits();
5442 auto Zero = Builder.buildConstant(Ty, 0);
5443
5444 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5445 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5446 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5447 // Splat the sign bit into the register
5448 auto Sign = Builder.buildAShr(
5449 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5450
5451 // Add (LHS < 0) ? abs2 - 1 : 0;
5452 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5453 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5454 auto AShr = Builder.buildAShr(Ty, Add, C1);
5455
5456 // Special case: (sdiv X, 1) -> X
5457 // Special Case: (sdiv X, -1) -> 0-X
5458 auto One = Builder.buildConstant(Ty, 1);
5459 auto MinusOne = Builder.buildConstant(Ty, -1);
5460 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5461 auto IsMinusOne =
5463 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5464 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5465
5466 // If divided by a positive value, we're done. Otherwise, the result must be
5467 // negated.
5468 auto Neg = Builder.buildNeg(Ty, AShr);
5469 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5470 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5471 MI.eraseFromParent();
5472}
5473
5475 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5476 auto &UDiv = cast<GenericMachineInstr>(MI);
5477 Register Dst = UDiv.getReg(0);
5478 Register LHS = UDiv.getReg(1);
5479 Register RHS = UDiv.getReg(2);
5480 LLT Ty = MRI.getType(Dst);
5482
5483 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5484 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5485 MI.eraseFromParent();
5486}
5487
5489 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5490 Register RHS = MI.getOperand(2).getReg();
5491 Register Dst = MI.getOperand(0).getReg();
5492 LLT Ty = MRI.getType(Dst);
5494 auto MatchPow2ExceptOne = [&](const Constant *C) {
5495 if (auto *CI = dyn_cast<ConstantInt>(C))
5496 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5497 return false;
5498 };
5499 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5500 return false;
5501 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5502}
5503
5505 Register LHS = MI.getOperand(1).getReg();
5506 Register RHS = MI.getOperand(2).getReg();
5507 Register Dst = MI.getOperand(0).getReg();
5508 LLT Ty = MRI.getType(Dst);
5510 unsigned NumEltBits = Ty.getScalarSizeInBits();
5511
5512 auto LogBase2 = buildLogBase2(RHS, Builder);
5513 auto ShiftAmt =
5514 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5515 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5516 Builder.buildLShr(Dst, LHS, Trunc);
5517 MI.eraseFromParent();
5518}
5519
5521 BuildFnTy &MatchInfo) {
5522 unsigned Opc = MI.getOpcode();
5523 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5524 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5525 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5526
5527 Register Dst = MI.getOperand(0).getReg();
5528 Register X = MI.getOperand(1).getReg();
5529 Register Y = MI.getOperand(2).getReg();
5530 LLT Type = MRI.getType(Dst);
5531
5532 // fold (fadd x, fneg(y)) -> (fsub x, y)
5533 // fold (fadd fneg(y), x) -> (fsub x, y)
5534 // G_ADD is commutative so both cases are checked by m_GFAdd
5535 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5536 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5537 Opc = TargetOpcode::G_FSUB;
5538 }
5539 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5540 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5541 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5542 Opc = TargetOpcode::G_FADD;
5543 }
5544 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5545 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5546 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5547 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5548 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5549 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5550 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5551 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5552 // no opcode change
5553 } else
5554 return false;
5555
5556 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5558 MI.setDesc(B.getTII().get(Opc));
5559 MI.getOperand(1).setReg(X);
5560 MI.getOperand(2).setReg(Y);
5562 };
5563 return true;
5564}
5565
5567 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5568
5569 Register LHS = MI.getOperand(1).getReg();
5570 MatchInfo = MI.getOperand(2).getReg();
5571 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5572
5573 const auto LHSCst = Ty.isVector()
5574 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5576 if (!LHSCst)
5577 return false;
5578
5579 // -0.0 is always allowed
5580 if (LHSCst->Value.isNegZero())
5581 return true;
5582
5583 // +0.0 is only allowed if nsz is set.
5584 if (LHSCst->Value.isPosZero())
5585 return MI.getFlag(MachineInstr::FmNsz);
5586
5587 return false;
5588}
5589
5591 Register Dst = MI.getOperand(0).getReg();
5593 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5594 eraseInst(MI);
5595}
5596
5597/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5598/// due to global flags or MachineInstr flags.
5599static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5600 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5601 return false;
5602 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5603}
5604
5605static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5606 const MachineRegisterInfo &MRI) {
5607 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5608 MRI.use_instr_nodbg_end()) >
5609 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5610 MRI.use_instr_nodbg_end());
5611}
5612
5614 bool &AllowFusionGlobally,
5615 bool &HasFMAD, bool &Aggressive,
5616 bool CanReassociate) {
5617
5618 auto *MF = MI.getMF();
5619 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5620 const TargetOptions &Options = MF->getTarget().Options;
5621 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5622
5623 if (CanReassociate &&
5624 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5625 return false;
5626
5627 // Floating-point multiply-add with intermediate rounding.
5628 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5629 // Floating-point multiply-add without intermediate rounding.
5630 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5631 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5632 // No valid opcode, do not combine.
5633 if (!HasFMAD && !HasFMA)
5634 return false;
5635
5636 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5637 Options.UnsafeFPMath || HasFMAD;
5638 // If the addition is not contractable, do not combine.
5639 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5640 return false;
5641
5642 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5643 return true;
5644}
5645
5647 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5648 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5649
5650 bool AllowFusionGlobally, HasFMAD, Aggressive;
5651 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5652 return false;
5653
5654 Register Op1 = MI.getOperand(1).getReg();
5655 Register Op2 = MI.getOperand(2).getReg();
5658 unsigned PreferredFusedOpcode =
5659 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5660
5661 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5662 // prefer to fold the multiply with fewer uses.
5663 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5664 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5665 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5666 std::swap(LHS, RHS);
5667 }
5668
5669 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5670 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5671 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5672 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5673 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5674 {LHS.MI->getOperand(1).getReg(),
5675 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5676 };
5677 return true;
5678 }
5679
5680 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5681 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5682 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5683 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5684 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5685 {RHS.MI->getOperand(1).getReg(),
5686 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5687 };
5688 return true;
5689 }
5690
5691 return false;
5692}
5693
5695 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5696 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5697
5698 bool AllowFusionGlobally, HasFMAD, Aggressive;
5699 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5700 return false;
5701
5702 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5703 Register Op1 = MI.getOperand(1).getReg();
5704 Register Op2 = MI.getOperand(2).getReg();
5707 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5708
5709 unsigned PreferredFusedOpcode =
5710 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5711
5712 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5713 // prefer to fold the multiply with fewer uses.
5714 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5715 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5716 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5717 std::swap(LHS, RHS);
5718 }
5719
5720 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5721 MachineInstr *FpExtSrc;
5722 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5723 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5724 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5725 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5726 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5727 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5728 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5729 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5730 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5731 };
5732 return true;
5733 }
5734
5735 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5736 // Note: Commutes FADD operands.
5737 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5738 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5739 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5740 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5741 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5742 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5743 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5744 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5745 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5746 };
5747 return true;
5748 }
5749
5750 return false;
5751}
5752
5754 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5755 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5756
5757 bool AllowFusionGlobally, HasFMAD, Aggressive;
5758 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5759 return false;
5760
5761 Register Op1 = MI.getOperand(1).getReg();
5762 Register Op2 = MI.getOperand(2).getReg();
5765 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5766
5767 unsigned PreferredFusedOpcode =
5768 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5769
5770 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5771 // prefer to fold the multiply with fewer uses.
5772 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5773 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5774 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5775 std::swap(LHS, RHS);
5776 }
5777
5778 MachineInstr *FMA = nullptr;
5779 Register Z;
5780 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5781 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5782 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5783 TargetOpcode::G_FMUL) &&
5784 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5785 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5786 FMA = LHS.MI;
5787 Z = RHS.Reg;
5788 }
5789 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5790 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5791 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5792 TargetOpcode::G_FMUL) &&
5793 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5794 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5795 Z = LHS.Reg;
5796 FMA = RHS.MI;
5797 }
5798
5799 if (FMA) {
5800 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5801 Register X = FMA->getOperand(1).getReg();
5802 Register Y = FMA->getOperand(2).getReg();
5803 Register U = FMulMI->getOperand(1).getReg();
5804 Register V = FMulMI->getOperand(2).getReg();
5805
5806 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5807 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5808 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5809 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5810 {X, Y, InnerFMA});
5811 };
5812 return true;
5813 }
5814
5815 return false;
5816}
5817
5819 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5820 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5821
5822 bool AllowFusionGlobally, HasFMAD, Aggressive;
5823 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5824 return false;
5825
5826 if (!Aggressive)
5827 return false;
5828
5829 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5830 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5831 Register Op1 = MI.getOperand(1).getReg();
5832 Register Op2 = MI.getOperand(2).getReg();
5835
5836 unsigned PreferredFusedOpcode =
5837 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5838
5839 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5840 // prefer to fold the multiply with fewer uses.
5841 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5842 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5843 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5844 std::swap(LHS, RHS);
5845 }
5846
5847 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5848 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5850 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5851 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5852 Register InnerFMA =
5853 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5854 .getReg(0);
5855 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5856 {X, Y, InnerFMA});
5857 };
5858
5859 MachineInstr *FMulMI, *FMAMI;
5860 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5861 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5862 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5863 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5864 m_GFPExt(m_MInstr(FMulMI))) &&
5865 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5866 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5867 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5868 MatchInfo = [=](MachineIRBuilder &B) {
5869 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5870 FMulMI->getOperand(2).getReg(), RHS.Reg,
5871 LHS.MI->getOperand(1).getReg(),
5872 LHS.MI->getOperand(2).getReg(), B);
5873 };
5874 return true;
5875 }
5876
5877 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5878 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5879 // FIXME: This turns two single-precision and one double-precision
5880 // operation into two double-precision operations, which might not be
5881 // interesting for all targets, especially GPUs.
5882 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5883 FMAMI->getOpcode() == PreferredFusedOpcode) {
5884 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5885 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5886 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5887 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5888 MatchInfo = [=](MachineIRBuilder &B) {
5889 Register X = FMAMI->getOperand(1).getReg();
5890 Register Y = FMAMI->getOperand(2).getReg();
5891 X = B.buildFPExt(DstType, X).getReg(0);
5892 Y = B.buildFPExt(DstType, Y).getReg(0);
5893 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5894 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5895 };
5896
5897 return true;
5898 }
5899 }
5900
5901 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5902 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5903 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5904 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5905 m_GFPExt(m_MInstr(FMulMI))) &&
5906 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5907 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5908 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5909 MatchInfo = [=](MachineIRBuilder &B) {
5910 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5911 FMulMI->getOperand(2).getReg(), LHS.Reg,
5912 RHS.MI->getOperand(1).getReg(),
5913 RHS.MI->getOperand(2).getReg(), B);
5914 };
5915 return true;
5916 }
5917
5918 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5919 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5920 // FIXME: This turns two single-precision and one double-precision
5921 // operation into two double-precision operations, which might not be
5922 // interesting for all targets, especially GPUs.
5923 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5924 FMAMI->getOpcode() == PreferredFusedOpcode) {
5925 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5926 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5927 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5928 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5929 MatchInfo = [=](MachineIRBuilder &B) {
5930 Register X = FMAMI->getOperand(1).getReg();
5931 Register Y = FMAMI->getOperand(2).getReg();
5932 X = B.buildFPExt(DstType, X).getReg(0);
5933 Y = B.buildFPExt(DstType, Y).getReg(0);
5934 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5935 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5936 };
5937 return true;
5938 }
5939 }
5940
5941 return false;
5942}
5943
5945 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5946 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5947
5948 bool AllowFusionGlobally, HasFMAD, Aggressive;
5949 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5950 return false;
5951
5952 Register Op1 = MI.getOperand(1).getReg();
5953 Register Op2 = MI.getOperand(2).getReg();
5956 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5957
5958 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5959 // prefer to fold the multiply with fewer uses.
5960 int FirstMulHasFewerUses = true;
5961 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5962 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5963 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5964 FirstMulHasFewerUses = false;
5965
5966 unsigned PreferredFusedOpcode =
5967 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5968
5969 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5970 if (FirstMulHasFewerUses &&
5971 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5972 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5973 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5974 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5975 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5976 {LHS.MI->getOperand(1).getReg(),
5977 LHS.MI->getOperand(2).getReg(), NegZ});
5978 };
5979 return true;
5980 }
5981 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5982 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5983 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5984 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5985 Register NegY =
5986 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5987 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5988 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5989 };
5990 return true;
5991 }
5992
5993 return false;
5994}
5995
5997 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5998 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5999
6000 bool AllowFusionGlobally, HasFMAD, Aggressive;
6001 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6002 return false;
6003
6004 Register LHSReg = MI.getOperand(1).getReg();
6005 Register RHSReg = MI.getOperand(2).getReg();
6006 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6007
6008 unsigned PreferredFusedOpcode =
6009 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6010
6011 MachineInstr *FMulMI;
6012 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6013 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6014 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6015 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6016 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6017 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6018 Register NegX =
6019 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6020 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6021 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6022 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6023 };
6024 return true;
6025 }
6026
6027 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6028 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6029 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6030 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6031 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6032 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6033 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6034 {FMulMI->getOperand(1).getReg(),
6035 FMulMI->getOperand(2).getReg(), LHSReg});
6036 };
6037 return true;
6038 }
6039
6040 return false;
6041}
6042
6044 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6045 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6046
6047 bool AllowFusionGlobally, HasFMAD, Aggressive;
6048 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6049 return false;
6050
6051 Register LHSReg = MI.getOperand(1).getReg();
6052 Register RHSReg = MI.getOperand(2).getReg();
6053 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6054
6055 unsigned PreferredFusedOpcode =
6056 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6057
6058 MachineInstr *FMulMI;
6059 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6060 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6061 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6062 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6063 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6064 Register FpExtX =
6065 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6066 Register FpExtY =
6067 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6068 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6069 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6070 {FpExtX, FpExtY, NegZ});
6071 };
6072 return true;
6073 }
6074
6075 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6076 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6077 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6078 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6079 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6080 Register FpExtY =
6081 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6082 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6083 Register FpExtZ =
6084 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6085 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6086 {NegY, FpExtZ, LHSReg});
6087 };
6088 return true;
6089 }
6090
6091 return false;
6092}
6093
6095 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6096 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6097
6098 bool AllowFusionGlobally, HasFMAD, Aggressive;
6099 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6100 return false;
6101
6102 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6103 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6104 Register LHSReg = MI.getOperand(1).getReg();
6105 Register RHSReg = MI.getOperand(2).getReg();
6106
6107 unsigned PreferredFusedOpcode =
6108 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6109
6110 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6112 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6113 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6114 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6115 };
6116
6117 MachineInstr *FMulMI;
6118 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6119 // (fneg (fma (fpext x), (fpext y), z))
6120 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6121 // (fneg (fma (fpext x), (fpext y), z))
6122 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6123 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6124 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6125 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6126 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6127 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6129 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6130 FMulMI->getOperand(2).getReg(), RHSReg, B);
6131 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6132 };
6133 return true;
6134 }
6135
6136 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6137 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6138 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6139 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6140 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6141 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6142 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6143 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6144 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6145 FMulMI->getOperand(2).getReg(), LHSReg, B);
6146 };
6147 return true;
6148 }
6149
6150 return false;
6151}
6152
6154 unsigned &IdxToPropagate) {
6155 bool PropagateNaN;
6156 switch (MI.getOpcode()) {
6157 default:
6158 return false;
6159 case TargetOpcode::G_FMINNUM:
6160 case TargetOpcode::G_FMAXNUM:
6161 PropagateNaN = false;
6162 break;
6163 case TargetOpcode::G_FMINIMUM:
6164 case TargetOpcode::G_FMAXIMUM:
6165 PropagateNaN = true;
6166 break;
6167 }
6168
6169 auto MatchNaN = [&](unsigned Idx) {
6170 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6171 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6172 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6173 return false;
6174 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6175 return true;
6176 };
6177
6178 return MatchNaN(1) || MatchNaN(2);
6179}
6180
6182 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6183 Register LHS = MI.getOperand(1).getReg();
6184 Register RHS = MI.getOperand(2).getReg();
6185
6186 // Helper lambda to check for opportunities for
6187 // A + (B - A) -> B
6188 // (B - A) + A -> B
6189 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6190 Register Reg;
6191 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6192 Reg == MaybeSameReg;
6193 };
6194 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6195}
6196
6198 Register &MatchInfo) {
6199 // This combine folds the following patterns:
6200 //
6201 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6202 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6203 // into
6204 // x
6205 // if
6206 // k == sizeof(VecEltTy)/2
6207 // type(x) == type(dst)
6208 //
6209 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6210 // into
6211 // x
6212 // if
6213 // type(x) == type(dst)
6214
6215 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6216 LLT DstEltTy = DstVecTy.getElementType();
6217
6218 Register Lo, Hi;
6219
6220 if (mi_match(
6221 MI, MRI,
6223 MatchInfo = Lo;
6224 return MRI.getType(MatchInfo) == DstVecTy;
6225 }
6226
6227 std::optional<ValueAndVReg> ShiftAmount;
6228 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6229 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6230 if (mi_match(
6231 MI, MRI,
6232 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6233 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6234 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6235 MatchInfo = Lo;
6236 return MRI.getType(MatchInfo) == DstVecTy;
6237 }
6238 }
6239
6240 return false;
6241}
6242
6244 Register &MatchInfo) {
6245 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6246 // if type(x) == type(G_TRUNC)
6247 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6248 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6249 return false;
6250
6251 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6252}
6253
6255 Register &MatchInfo) {
6256 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6257 // y if K == size of vector element type
6258 std::optional<ValueAndVReg> ShiftAmt;
6259 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6261 m_GCst(ShiftAmt))))
6262 return false;
6263
6264 LLT MatchTy = MRI.getType(MatchInfo);
6265 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6266 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6267}
6268
6269unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6270 CmpInst::Predicate Pred, LLT DstTy,
6271 SelectPatternNaNBehaviour VsNaNRetVal) const {
6272 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6273 "Expected a NaN behaviour?");
6274 // Choose an opcode based off of legality or the behaviour when one of the
6275 // LHS/RHS may be NaN.
6276 switch (Pred) {
6277 default:
6278 return 0;
6279 case CmpInst::FCMP_UGT:
6280 case CmpInst::FCMP_UGE:
6281 case CmpInst::FCMP_OGT:
6282 case CmpInst::FCMP_OGE:
6283 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6284 return TargetOpcode::G_FMAXNUM;
6285 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6286 return TargetOpcode::G_FMAXIMUM;
6287 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6288 return TargetOpcode::G_FMAXNUM;
6289 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6290 return TargetOpcode::G_FMAXIMUM;
6291 return 0;
6292 case CmpInst::FCMP_ULT:
6293 case CmpInst::FCMP_ULE:
6294 case CmpInst::FCMP_OLT:
6295 case CmpInst::FCMP_OLE:
6296 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6297 return TargetOpcode::G_FMINNUM;
6298 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6299 return TargetOpcode::G_FMINIMUM;
6300 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6301 return TargetOpcode::G_FMINNUM;
6302 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6303 return 0;
6304 return TargetOpcode::G_FMINIMUM;
6305 }
6306}
6307
6308CombinerHelper::SelectPatternNaNBehaviour
6309CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6310 bool IsOrderedComparison) const {
6311 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6312 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6313 // Completely unsafe.
6314 if (!LHSSafe && !RHSSafe)
6315 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6316 if (LHSSafe && RHSSafe)
6317 return SelectPatternNaNBehaviour::RETURNS_ANY;
6318 // An ordered comparison will return false when given a NaN, so it
6319 // returns the RHS.
6320 if (IsOrderedComparison)
6321 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6322 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6323 // An unordered comparison will return true when given a NaN, so it
6324 // returns the LHS.
6325 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6326 : SelectPatternNaNBehaviour::RETURNS_NAN;
6327}
6328
6329bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6330 Register TrueVal, Register FalseVal,
6331 BuildFnTy &MatchInfo) {
6332 // Match: select (fcmp cond x, y) x, y
6333 // select (fcmp cond x, y) y, x
6334 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6335 LLT DstTy = MRI.getType(Dst);
6336 // Bail out early on pointers, since we'll never want to fold to a min/max.
6337 if (DstTy.isPointer())
6338 return false;
6339 // Match a floating point compare with a less-than/greater-than predicate.
6340 // TODO: Allow multiple users of the compare if they are all selects.
6341 CmpInst::Predicate Pred;
6342 Register CmpLHS, CmpRHS;
6343 if (!mi_match(Cond, MRI,
6345 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6346 CmpInst::isEquality(Pred))
6347 return false;
6348 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6349 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6350 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6351 return false;
6352 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6353 std::swap(CmpLHS, CmpRHS);
6354 Pred = CmpInst::getSwappedPredicate(Pred);
6355 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6356 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6357 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6358 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6359 }
6360 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6361 return false;
6362 // Decide what type of max/min this should be based off of the predicate.
6363 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6364 if (!Opc || !isLegal({Opc, {DstTy}}))
6365 return false;
6366 // Comparisons between signed zero and zero may have different results...
6367 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6368 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6369 // We don't know if a comparison between two 0s will give us a consistent
6370 // result. Be conservative and only proceed if at least one side is
6371 // non-zero.
6372 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6373 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6374 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6375 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6376 return false;
6377 }
6378 }
6379 MatchInfo = [=](MachineIRBuilder &B) {
6380 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6381 };
6382 return true;
6383}
6384
6386 BuildFnTy &MatchInfo) {
6387 // TODO: Handle integer cases.
6388 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6389 // Condition may be fed by a truncated compare.
6390 Register Cond = MI.getOperand(1).getReg();
6391 Register MaybeTrunc;
6392 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6393 Cond = MaybeTrunc;
6394 Register Dst = MI.getOperand(0).getReg();
6395 Register TrueVal = MI.getOperand(2).getReg();
6396 Register FalseVal = MI.getOperand(3).getReg();
6397 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6398}
6399
6401 BuildFnTy &MatchInfo) {
6402 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6403 // (X + Y) == X --> Y == 0
6404 // (X + Y) != X --> Y != 0
6405 // (X - Y) == X --> Y == 0
6406 // (X - Y) != X --> Y != 0
6407 // (X ^ Y) == X --> Y == 0
6408 // (X ^ Y) != X --> Y != 0
6409 Register Dst = MI.getOperand(0).getReg();
6410 CmpInst::Predicate Pred;
6411 Register X, Y, OpLHS, OpRHS;
6412 bool MatchedSub = mi_match(
6413 Dst, MRI,
6414 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6415 if (MatchedSub && X != OpLHS)
6416 return false;
6417 if (!MatchedSub) {
6418 if (!mi_match(Dst, MRI,
6419 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6420 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6421 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6422 return false;
6423 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6424 }
6425 MatchInfo = [=](MachineIRBuilder &B) {
6426 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6427 B.buildICmp(Pred, Dst, Y, Zero);
6428 };
6429 return CmpInst::isEquality(Pred) && Y.isValid();
6430}
6431
6433 Register ShiftReg = MI.getOperand(2).getReg();
6434 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6435 auto IsShiftTooBig = [&](const Constant *C) {
6436 auto *CI = dyn_cast<ConstantInt>(C);
6437 return CI && CI->uge(ResTy.getScalarSizeInBits());
6438 };
6439 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6440}
6441
6443 unsigned LHSOpndIdx = 1;
6444 unsigned RHSOpndIdx = 2;
6445 switch (MI.getOpcode()) {
6446 case TargetOpcode::G_UADDO:
6447 case TargetOpcode::G_SADDO:
6448 case TargetOpcode::G_UMULO:
6449 case TargetOpcode::G_SMULO:
6450 LHSOpndIdx = 2;
6451 RHSOpndIdx = 3;
6452 break;
6453 default:
6454 break;
6455 }
6456 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6457 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6458 if (!getIConstantVRegVal(LHS, MRI)) {
6459 // Skip commuting if LHS is not a constant. But, LHS may be a
6460 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6461 // have a constant on the RHS.
6462 if (MRI.getVRegDef(LHS)->getOpcode() !=
6463 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6464 return false;
6465 }
6466 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6467 return MRI.getVRegDef(RHS)->getOpcode() !=
6468 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6470}
6471
6473 Register LHS = MI.getOperand(1).getReg();
6474 Register RHS = MI.getOperand(2).getReg();
6475 std::optional<FPValueAndVReg> ValAndVReg;
6476 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6477 return false;
6478 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6479}
6480
6483 unsigned LHSOpndIdx = 1;
6484 unsigned RHSOpndIdx = 2;
6485 switch (MI.getOpcode()) {
6486 case TargetOpcode::G_UADDO:
6487 case TargetOpcode::G_SADDO:
6488 case TargetOpcode::G_UMULO:
6489 case TargetOpcode::G_SMULO:
6490 LHSOpndIdx = 2;
6491 RHSOpndIdx = 3;
6492 break;
6493 default:
6494 break;
6495 }
6496 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6497 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6498 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6499 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6501}
6502
6503bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6504 LLT SrcTy = MRI.getType(Src);
6505 if (SrcTy.isFixedVector())
6506 return isConstantSplatVector(Src, 1, AllowUndefs);
6507 if (SrcTy.isScalar()) {
6508 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6509 return true;
6510 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6511 return IConstant && IConstant->Value == 1;
6512 }
6513 return false; // scalable vector
6514}
6515
6516bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6517 LLT SrcTy = MRI.getType(Src);
6518 if (SrcTy.isFixedVector())
6519 return isConstantSplatVector(Src, 0, AllowUndefs);
6520 if (SrcTy.isScalar()) {
6521 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6522 return true;
6523 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6524 return IConstant && IConstant->Value == 0;
6525 }
6526 return false; // scalable vector
6527}
6528
6529// Ignores COPYs during conformance checks.
6530// FIXME scalable vectors.
6531bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6532 bool AllowUndefs) {
6533 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6534 if (!BuildVector)
6535 return false;
6536 unsigned NumSources = BuildVector->getNumSources();
6537
6538 for (unsigned I = 0; I < NumSources; ++I) {
6539 GImplicitDef *ImplicitDef =
6540 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6541 if (ImplicitDef && AllowUndefs)
6542 continue;
6543 if (ImplicitDef && !AllowUndefs)
6544 return false;
6545 std::optional<ValueAndVReg> IConstant =
6547 if (IConstant && IConstant->Value == SplatValue)
6548 continue;
6549 return false;
6550 }
6551 return true;
6552}
6553
6554// Ignores COPYs during lookups.
6555// FIXME scalable vectors
6556std::optional<APInt>
6557CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6558 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6559 if (IConstant)
6560 return IConstant->Value;
6561
6562 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6563 if (!BuildVector)
6564 return std::nullopt;
6565 unsigned NumSources = BuildVector->getNumSources();
6566
6567 std::optional<APInt> Value = std::nullopt;
6568 for (unsigned I = 0; I < NumSources; ++I) {
6569 std::optional<ValueAndVReg> IConstant =
6571 if (!IConstant)
6572 return std::nullopt;
6573 if (!Value)
6574 Value = IConstant->Value;
6575 else if (*Value != IConstant->Value)
6576 return std::nullopt;
6577 }
6578 return Value;
6579}
6580
6581// FIXME G_SPLAT_VECTOR
6582bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6583 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6584 if (IConstant)
6585 return true;
6586
6587 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6588 if (!BuildVector)
6589 return false;
6590
6591 unsigned NumSources = BuildVector->getNumSources();
6592 for (unsigned I = 0; I < NumSources; ++I) {
6593 std::optional<ValueAndVReg> IConstant =
6595 if (!IConstant)
6596 return false;
6597 }
6598 return true;
6599}
6600
6601// TODO: use knownbits to determine zeros
6602bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6603 BuildFnTy &MatchInfo) {
6604 uint32_t Flags = Select->getFlags();
6605 Register Dest = Select->getReg(0);
6606 Register Cond = Select->getCondReg();
6607 Register True = Select->getTrueReg();
6608 Register False = Select->getFalseReg();
6609 LLT CondTy = MRI.getType(Select->getCondReg());
6610 LLT TrueTy = MRI.getType(Select->getTrueReg());
6611
6612 // We only do this combine for scalar boolean conditions.
6613 if (CondTy != LLT::scalar(1))
6614 return false;
6615
6616 if (TrueTy.isPointer())
6617 return false;
6618
6619 // Both are scalars.
6620 std::optional<ValueAndVReg> TrueOpt =
6622 std::optional<ValueAndVReg> FalseOpt =
6624
6625 if (!TrueOpt || !FalseOpt)
6626 return false;
6627
6628 APInt TrueValue = TrueOpt->Value;
6629 APInt FalseValue = FalseOpt->Value;
6630
6631 // select Cond, 1, 0 --> zext (Cond)
6632 if (TrueValue.isOne() && FalseValue.isZero()) {
6633 MatchInfo = [=](MachineIRBuilder &B) {
6634 B.setInstrAndDebugLoc(*Select);
6635 B.buildZExtOrTrunc(Dest, Cond);
6636 };
6637 return true;
6638 }
6639
6640 // select Cond, -1, 0 --> sext (Cond)
6641 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6642 MatchInfo = [=](MachineIRBuilder &B) {
6643 B.setInstrAndDebugLoc(*Select);
6644 B.buildSExtOrTrunc(Dest, Cond);
6645 };
6646 return true;
6647 }
6648
6649 // select Cond, 0, 1 --> zext (!Cond)
6650 if (TrueValue.isZero() && FalseValue.isOne()) {
6651 MatchInfo = [=](MachineIRBuilder &B) {
6652 B.setInstrAndDebugLoc(*Select);
6654 B.buildNot(Inner, Cond);
6655 B.buildZExtOrTrunc(Dest, Inner);
6656 };
6657 return true;
6658 }
6659
6660 // select Cond, 0, -1 --> sext (!Cond)
6661 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6662 MatchInfo = [=](MachineIRBuilder &B) {
6663 B.setInstrAndDebugLoc(*Select);
6665 B.buildNot(Inner, Cond);
6666 B.buildSExtOrTrunc(Dest, Inner);
6667 };
6668 return true;
6669 }
6670
6671 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6672 if (TrueValue - 1 == FalseValue) {
6673 MatchInfo = [=](MachineIRBuilder &B) {
6674 B.setInstrAndDebugLoc(*Select);
6676 B.buildZExtOrTrunc(Inner, Cond);
6677 B.buildAdd(Dest, Inner, False);
6678 };
6679 return true;
6680 }
6681
6682 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6683 if (TrueValue + 1 == FalseValue) {
6684 MatchInfo = [=](MachineIRBuilder &B) {
6685 B.setInstrAndDebugLoc(*Select);
6687 B.buildSExtOrTrunc(Inner, Cond);
6688 B.buildAdd(Dest, Inner, False);
6689 };
6690 return true;
6691 }
6692
6693 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6694 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6695 MatchInfo = [=](MachineIRBuilder &B) {
6696 B.setInstrAndDebugLoc(*Select);
6698 B.buildZExtOrTrunc(Inner, Cond);
6699 // The shift amount must be scalar.
6700 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6701 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6702 B.buildShl(Dest, Inner, ShAmtC, Flags);
6703 };
6704 return true;
6705 }
6706 // select Cond, -1, C --> or (sext Cond), C
6707 if (TrueValue.isAllOnes()) {
6708 MatchInfo = [=](MachineIRBuilder &B) {
6709 B.setInstrAndDebugLoc(*Select);
6711 B.buildSExtOrTrunc(Inner, Cond);
6712 B.buildOr(Dest, Inner, False, Flags);
6713 };
6714 return true;
6715 }
6716
6717 // select Cond, C, -1 --> or (sext (not Cond)), C
6718 if (FalseValue.isAllOnes()) {
6719 MatchInfo = [=](MachineIRBuilder &B) {
6720 B.setInstrAndDebugLoc(*Select);
6722 B.buildNot(Not, Cond);
6724 B.buildSExtOrTrunc(Inner, Not);
6725 B.buildOr(Dest, Inner, True, Flags);
6726 };
6727 return true;
6728 }
6729
6730 return false;
6731}
6732
6733// TODO: use knownbits to determine zeros
6734bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6735 BuildFnTy &MatchInfo) {
6736 uint32_t Flags = Select->getFlags();
6737 Register DstReg = Select->getReg(0);
6738 Register Cond = Select->getCondReg();
6739 Register True = Select->getTrueReg();
6740 Register False = Select->getFalseReg();
6741 LLT CondTy = MRI.getType(Select->getCondReg());
6742 LLT TrueTy = MRI.getType(Select->getTrueReg());
6743
6744 // Boolean or fixed vector of booleans.
6745 if (CondTy.isScalableVector() ||
6746 (CondTy.isFixedVector() &&
6747 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6748 CondTy.getScalarSizeInBits() != 1)
6749 return false;
6750
6751 if (CondTy != TrueTy)
6752 return false;
6753
6754 // select Cond, Cond, F --> or Cond, F
6755 // select Cond, 1, F --> or Cond, F
6756 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6757 MatchInfo = [=](MachineIRBuilder &B) {
6758 B.setInstrAndDebugLoc(*Select);
6760 B.buildZExtOrTrunc(Ext, Cond);
6761 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6762 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6763 };
6764 return true;
6765 }
6766
6767 // select Cond, T, Cond --> and Cond, T
6768 // select Cond, T, 0 --> and Cond, T
6769 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6770 MatchInfo = [=](MachineIRBuilder &B) {
6771 B.setInstrAndDebugLoc(*Select);
6773 B.buildZExtOrTrunc(Ext, Cond);
6774 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6775 B.buildAnd(DstReg, Ext, FreezeTrue);
6776 };
6777 return true;
6778 }
6779
6780 // select Cond, T, 1 --> or (not Cond), T
6781 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6782 MatchInfo = [=](MachineIRBuilder &B) {
6783 B.setInstrAndDebugLoc(*Select);
6784 // First the not.
6786 B.buildNot(Inner, Cond);
6787 // Then an ext to match the destination register.
6789 B.buildZExtOrTrunc(Ext, Inner);
6790 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6791 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6792 };
6793 return true;
6794 }
6795
6796 // select Cond, 0, F --> and (not Cond), F
6797 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6798 MatchInfo = [=](MachineIRBuilder &B) {
6799 B.setInstrAndDebugLoc(*Select);
6800 // First the not.
6802 B.buildNot(Inner, Cond);
6803 // Then an ext to match the destination register.
6805 B.buildZExtOrTrunc(Ext, Inner);
6806 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6807 B.buildAnd(DstReg, Ext, FreezeFalse);
6808 };
6809 return true;
6810 }
6811
6812 return false;
6813}
6814
6816 BuildFnTy &MatchInfo) {
6817 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
6818 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
6819
6820 Register DstReg = Select->getReg(0);
6821 Register True = Select->getTrueReg();
6822 Register False = Select->getFalseReg();
6823 LLT DstTy = MRI.getType(DstReg);
6824
6825 if (DstTy.isPointer())
6826 return false;
6827
6828 // We want to fold the icmp and replace the select.
6829 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6830 return false;
6831
6832 CmpInst::Predicate Pred = Cmp->getCond();
6833 // We need a larger or smaller predicate for
6834 // canonicalization.
6835 if (CmpInst::isEquality(Pred))
6836 return false;
6837
6838 Register CmpLHS = Cmp->getLHSReg();
6839 Register CmpRHS = Cmp->getRHSReg();
6840
6841 // We can swap CmpLHS and CmpRHS for higher hitrate.
6842 if (True == CmpRHS && False == CmpLHS) {
6843 std::swap(CmpLHS, CmpRHS);
6844 Pred = CmpInst::getSwappedPredicate(Pred);
6845 }
6846
6847 // (icmp X, Y) ? X : Y -> integer minmax.
6848 // see matchSelectPattern in ValueTracking.
6849 // Legality between G_SELECT and integer minmax can differ.
6850 if (True != CmpLHS || False != CmpRHS)
6851 return false;
6852
6853 switch (Pred) {
6854 case ICmpInst::ICMP_UGT:
6855 case ICmpInst::ICMP_UGE: {
6856 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6857 return false;
6858 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
6859 return true;
6860 }
6861 case ICmpInst::ICMP_SGT:
6862 case ICmpInst::ICMP_SGE: {
6863 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6864 return false;
6865 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
6866 return true;
6867 }
6868 case ICmpInst::ICMP_ULT:
6869 case ICmpInst::ICMP_ULE: {
6870 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6871 return false;
6872 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
6873 return true;
6874 }
6875 case ICmpInst::ICMP_SLT:
6876 case ICmpInst::ICMP_SLE: {
6877 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6878 return false;
6879 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
6880 return true;
6881 }
6882 default:
6883 return false;
6884 }
6885}
6886
6888 GSelect *Select = cast<GSelect>(&MI);
6889
6890 if (tryFoldSelectOfConstants(Select, MatchInfo))
6891 return true;
6892
6893 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6894 return true;
6895
6896 return false;
6897}
6898
6899/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6900/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6901/// into a single comparison using range-based reasoning.
6902/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6903bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6904 BuildFnTy &MatchInfo) {
6905 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6906 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6907 Register DstReg = Logic->getReg(0);
6908 Register LHS = Logic->getLHSReg();
6909 Register RHS = Logic->getRHSReg();
6910 unsigned Flags = Logic->getFlags();
6911
6912 // We need an G_ICMP on the LHS register.
6913 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6914 if (!Cmp1)
6915 return false;
6916
6917 // We need an G_ICMP on the RHS register.
6918 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6919 if (!Cmp2)
6920 return false;
6921
6922 // We want to fold the icmps.
6923 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6924 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6925 return false;
6926
6927 APInt C1;
6928 APInt C2;
6929 std::optional<ValueAndVReg> MaybeC1 =
6931 if (!MaybeC1)
6932 return false;
6933 C1 = MaybeC1->Value;
6934
6935 std::optional<ValueAndVReg> MaybeC2 =
6937 if (!MaybeC2)
6938 return false;
6939 C2 = MaybeC2->Value;
6940
6941 Register R1 = Cmp1->getLHSReg();
6942 Register R2 = Cmp2->getLHSReg();
6943 CmpInst::Predicate Pred1 = Cmp1->getCond();
6944 CmpInst::Predicate Pred2 = Cmp2->getCond();
6945 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6946 LLT CmpOperandTy = MRI.getType(R1);
6947
6948 if (CmpOperandTy.isPointer())
6949 return false;
6950
6951 // We build ands, adds, and constants of type CmpOperandTy.
6952 // They must be legal to build.
6953 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6954 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6955 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6956 return false;
6957
6958 // Look through add of a constant offset on R1, R2, or both operands. This
6959 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6960 std::optional<APInt> Offset1;
6961 std::optional<APInt> Offset2;
6962 if (R1 != R2) {
6963 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6964 std::optional<ValueAndVReg> MaybeOffset1 =
6966 if (MaybeOffset1) {
6967 R1 = Add->getLHSReg();
6968 Offset1 = MaybeOffset1->Value;
6969 }
6970 }
6971 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6972 std::optional<ValueAndVReg> MaybeOffset2 =
6974 if (MaybeOffset2) {
6975 R2 = Add->getLHSReg();
6976 Offset2 = MaybeOffset2->Value;
6977 }
6978 }
6979 }
6980
6981 if (R1 != R2)
6982 return false;
6983
6984 // We calculate the icmp ranges including maybe offsets.
6986 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6987 if (Offset1)
6988 CR1 = CR1.subtract(*Offset1);
6989
6991 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6992 if (Offset2)
6993 CR2 = CR2.subtract(*Offset2);
6994
6995 bool CreateMask = false;
6996 APInt LowerDiff;
6997 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6998 if (!CR) {
6999 // We need non-wrapping ranges.
7000 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7001 return false;
7002
7003 // Check whether we have equal-size ranges that only differ by one bit.
7004 // In that case we can apply a mask to map one range onto the other.
7005 LowerDiff = CR1.getLower() ^ CR2.getLower();
7006 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7007 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7008 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7009 CR1Size != CR2.getUpper() - CR2.getLower())
7010 return false;
7011
7012 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7013 CreateMask = true;
7014 }
7015
7016 if (IsAnd)
7017 CR = CR->inverse();
7018
7019 CmpInst::Predicate NewPred;
7020 APInt NewC, Offset;
7021 CR->getEquivalentICmp(NewPred, NewC, Offset);
7022
7023 // We take the result type of one of the original icmps, CmpTy, for
7024 // the to be build icmp. The operand type, CmpOperandTy, is used for
7025 // the other instructions and constants to be build. The types of
7026 // the parameters and output are the same for add and and. CmpTy
7027 // and the type of DstReg might differ. That is why we zext or trunc
7028 // the icmp into the destination register.
7029
7030 MatchInfo = [=](MachineIRBuilder &B) {
7031 if (CreateMask && Offset != 0) {
7032 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7033 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7034 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7035 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7036 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7037 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7038 B.buildZExtOrTrunc(DstReg, ICmp);
7039 } else if (CreateMask && Offset == 0) {
7040 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7041 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7042 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7043 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7044 B.buildZExtOrTrunc(DstReg, ICmp);
7045 } else if (!CreateMask && Offset != 0) {
7046 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7047 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7048 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7049 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7050 B.buildZExtOrTrunc(DstReg, ICmp);
7051 } else if (!CreateMask && Offset == 0) {
7052 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7053 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7054 B.buildZExtOrTrunc(DstReg, ICmp);
7055 } else {
7056 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7057 }
7058 };
7059 return true;
7060}
7061
7062bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7063 BuildFnTy &MatchInfo) {
7064 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7065 Register DestReg = Logic->getReg(0);
7066 Register LHS = Logic->getLHSReg();
7067 Register RHS = Logic->getRHSReg();
7068 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7069
7070 // We need a compare on the LHS register.
7071 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7072 if (!Cmp1)
7073 return false;
7074
7075 // We need a compare on the RHS register.
7076 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7077 if (!Cmp2)
7078 return false;
7079
7080 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7081 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7082
7083 // We build one fcmp, want to fold the fcmps, replace the logic op,
7084 // and the fcmps must have the same shape.
7086 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7087 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7088 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7089 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7090 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7091 return false;
7092
7093 CmpInst::Predicate PredL = Cmp1->getCond();
7094 CmpInst::Predicate PredR = Cmp2->getCond();
7095 Register LHS0 = Cmp1->getLHSReg();
7096 Register LHS1 = Cmp1->getRHSReg();
7097 Register RHS0 = Cmp2->getLHSReg();
7098 Register RHS1 = Cmp2->getRHSReg();
7099
7100 if (LHS0 == RHS1 && LHS1 == RHS0) {
7101 // Swap RHS operands to match LHS.
7102 PredR = CmpInst::getSwappedPredicate(PredR);
7103 std::swap(RHS0, RHS1);
7104 }
7105
7106 if (LHS0 == RHS0 && LHS1 == RHS1) {
7107 // We determine the new predicate.
7108 unsigned CmpCodeL = getFCmpCode(PredL);
7109 unsigned CmpCodeR = getFCmpCode(PredR);
7110 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7111 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7112 MatchInfo = [=](MachineIRBuilder &B) {
7113 // The fcmp predicates fill the lower part of the enum.
7114 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7115 if (Pred == FCmpInst::FCMP_FALSE &&
7117 auto False = B.buildConstant(CmpTy, 0);
7118 B.buildZExtOrTrunc(DestReg, False);
7119 } else if (Pred == FCmpInst::FCMP_TRUE &&
7121 auto True =
7122 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7123 CmpTy.isVector() /*isVector*/,
7124 true /*isFP*/));
7125 B.buildZExtOrTrunc(DestReg, True);
7126 } else { // We take the predicate without predicate optimizations.
7127 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7128 B.buildZExtOrTrunc(DestReg, Cmp);
7129 }
7130 };
7131 return true;
7132 }
7133
7134 return false;
7135}
7136
7138 GAnd *And = cast<GAnd>(&MI);
7139
7140 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7141 return true;
7142
7143 if (tryFoldLogicOfFCmps(And, MatchInfo))
7144 return true;
7145
7146 return false;
7147}
7148
7150 GOr *Or = cast<GOr>(&MI);
7151
7152 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7153 return true;
7154
7155 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7156 return true;
7157
7158 return false;
7159}
7160
7162 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7163
7164 // Addo has no flags
7165 Register Dst = Add->getReg(0);
7166 Register Carry = Add->getReg(1);
7167 Register LHS = Add->getLHSReg();
7168 Register RHS = Add->getRHSReg();
7169 bool IsSigned = Add->isSigned();
7170 LLT DstTy = MRI.getType(Dst);
7171 LLT CarryTy = MRI.getType(Carry);
7172
7173 // Fold addo, if the carry is dead -> add, undef.
7174 if (MRI.use_nodbg_empty(Carry) &&
7175 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7176 MatchInfo = [=](MachineIRBuilder &B) {
7177 B.buildAdd(Dst, LHS, RHS);
7178 B.buildUndef(Carry);
7179 };
7180 return true;
7181 }
7182
7183 // Canonicalize constant to RHS.
7184 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7185 if (IsSigned) {
7186 MatchInfo = [=](MachineIRBuilder &B) {
7187 B.buildSAddo(Dst, Carry, RHS, LHS);
7188 };
7189 return true;
7190 }
7191 // !IsSigned
7192 MatchInfo = [=](MachineIRBuilder &B) {
7193 B.buildUAddo(Dst, Carry, RHS, LHS);
7194 };
7195 return true;
7196 }
7197
7198 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7199 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7200
7201 // Fold addo(c1, c2) -> c3, carry.
7202 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7204 bool Overflow;
7205 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7206 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7207 MatchInfo = [=](MachineIRBuilder &B) {
7208 B.buildConstant(Dst, Result);
7209 B.buildConstant(Carry, Overflow);
7210 };
7211 return true;
7212 }
7213
7214 // Fold (addo x, 0) -> x, no carry
7215 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7216 MatchInfo = [=](MachineIRBuilder &B) {
7217 B.buildCopy(Dst, LHS);
7218 B.buildConstant(Carry, 0);
7219 };
7220 return true;
7221 }
7222
7223 // Given 2 constant operands whose sum does not overflow:
7224 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7225 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7226 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7227 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7228 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7229 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7230 std::optional<APInt> MaybeAddRHS =
7231 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7232 if (MaybeAddRHS) {
7233 bool Overflow;
7234 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7235 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7236 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7237 if (IsSigned) {
7238 MatchInfo = [=](MachineIRBuilder &B) {
7239 auto ConstRHS = B.buildConstant(DstTy, NewC);
7240 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7241 };
7242 return true;
7243 }
7244 // !IsSigned
7245 MatchInfo = [=](MachineIRBuilder &B) {
7246 auto ConstRHS = B.buildConstant(DstTy, NewC);
7247 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7248 };
7249 return true;
7250 }
7251 }
7252 };
7253
7254 // We try to combine addo to non-overflowing add.
7255 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7257 return false;
7258
7259 // We try to combine uaddo to non-overflowing add.
7260 if (!IsSigned) {
7261 ConstantRange CRLHS =
7262 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7263 ConstantRange CRRHS =
7264 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7265
7266 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7268 return false;
7270 MatchInfo = [=](MachineIRBuilder &B) {
7271 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7272 B.buildConstant(Carry, 0);
7273 };
7274 return true;
7275 }
7278 MatchInfo = [=](MachineIRBuilder &B) {
7279 B.buildAdd(Dst, LHS, RHS);
7280 B.buildConstant(Carry, 1);
7281 };
7282 return true;
7283 }
7284 }
7285 return false;
7286 }
7287
7288 // We try to combine saddo to non-overflowing add.
7289
7290 // If LHS and RHS each have at least two sign bits, then there is no signed
7291 // overflow.
7292 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7293 MatchInfo = [=](MachineIRBuilder &B) {
7294 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7295 B.buildConstant(Carry, 0);
7296 };
7297 return true;
7298 }
7299
7300 ConstantRange CRLHS =
7301 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7302 ConstantRange CRRHS =
7303 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7304
7305 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7307 return false;
7309 MatchInfo = [=](MachineIRBuilder &B) {
7310 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7311 B.buildConstant(Carry, 0);
7312 };
7313 return true;
7314 }
7317 MatchInfo = [=](MachineIRBuilder &B) {
7318 B.buildAdd(Dst, LHS, RHS);
7319 B.buildConstant(Carry, 1);
7320 };
7321 return true;
7322 }
7323 }
7324
7325 return false;
7326}
7327
7329 BuildFnTy &MatchInfo) {
7331 MatchInfo(Builder);
7332 Root->eraseFromParent();
7333}
7334
7336 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7338}
7339
7341 auto [Dst, Base] = MI.getFirst2Regs();
7342 LLT Ty = MRI.getType(Dst);
7343 int64_t ExpVal = Exponent;
7344
7345 if (ExpVal == 0) {
7346 Builder.buildFConstant(Dst, 1.0);
7347 MI.removeFromParent();
7348 return;
7349 }
7350
7351 if (ExpVal < 0)
7352 ExpVal = -ExpVal;
7353
7354 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
7355 // to generate the multiply sequence. There are more optimal ways to do this
7356 // (for example, powi(x,15) generates one more multiply than it should), but
7357 // this has the benefit of being both really simple and much better than a
7358 // libcall.
7359 std::optional<SrcOp> Res;
7360 SrcOp CurSquare = Base;
7361 while (ExpVal > 0) {
7362 if (ExpVal & 1) {
7363 if (!Res)
7364 Res = CurSquare;
7365 else
7366 Res = Builder.buildFMul(Ty, *Res, CurSquare);
7367 }
7368
7369 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
7370 ExpVal >>= 1;
7371 }
7372
7373 // If the original exponent was negative, invert the result, producing
7374 // 1/(x*x*x).
7375 if (Exponent < 0)
7376 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
7377 MI.getFlags());
7378
7379 Builder.buildCopy(Dst, *Res);
7380 MI.eraseFromParent();
7381}
7382
7384 BuildFnTy &MatchInfo) {
7385 // fold (A+C1)-C2 -> A+(C1-C2)
7386 const GSub *Sub = cast<GSub>(&MI);
7387 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
7388
7389 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
7390 return false;
7391
7392 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
7393 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
7394
7395 Register Dst = Sub->getReg(0);
7396 LLT DstTy = MRI.getType(Dst);
7397
7398 MatchInfo = [=](MachineIRBuilder &B) {
7399 auto Const = B.buildConstant(DstTy, C1 - C2);
7400 B.buildAdd(Dst, Add->getLHSReg(), Const);
7401 };
7402
7403 return true;
7404}
7405
7407 BuildFnTy &MatchInfo) {
7408 // fold C2-(A+C1) -> (C2-C1)-A
7409 const GSub *Sub = cast<GSub>(&MI);
7410 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
7411
7412 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
7413 return false;
7414
7415 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
7416 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
7417
7418 Register Dst = Sub->getReg(0);
7419 LLT DstTy = MRI.getType(Dst);
7420
7421 MatchInfo = [=](MachineIRBuilder &B) {
7422 auto Const = B.buildConstant(DstTy, C2 - C1);
7423 B.buildSub(Dst, Const, Add->getLHSReg());
7424 };
7425
7426 return true;
7427}
7428
7430 BuildFnTy &MatchInfo) {
7431 // fold (A-C1)-C2 -> A-(C1+C2)
7432 const GSub *Sub1 = cast<GSub>(&MI);
7433 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
7434
7435 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
7436 return false;
7437
7438 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
7439 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
7440
7441 Register Dst = Sub1->getReg(0);
7442 LLT DstTy = MRI.getType(Dst);
7443
7444 MatchInfo = [=](MachineIRBuilder &B) {
7445 auto Const = B.buildConstant(DstTy, C1 + C2);
7446 B.buildSub(Dst, Sub2->getLHSReg(), Const);
7447 };
7448
7449 return true;
7450}
7451
7453 BuildFnTy &MatchInfo) {
7454 // fold (C1-A)-C2 -> (C1-C2)-A
7455 const GSub *Sub1 = cast<GSub>(&MI);
7456 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
7457
7458 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
7459 return false;
7460
7461 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
7462 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
7463
7464 Register Dst = Sub1->getReg(0);
7465 LLT DstTy = MRI.getType(Dst);
7466
7467 MatchInfo = [=](MachineIRBuilder &B) {
7468 auto Const = B.buildConstant(DstTy, C1 - C2);
7469 B.buildSub(Dst, Const, Sub2->getRHSReg());
7470 };
7471
7472 return true;
7473}
7474
7476 BuildFnTy &MatchInfo) {
7477 // fold ((A-C1)+C2) -> (A+(C2-C1))
7478 const GAdd *Add = cast<GAdd>(&MI);
7479 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
7480
7481 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
7482 return false;
7483
7484 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
7485 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
7486
7487 Register Dst = Add->getReg(0);
7488 LLT DstTy = MRI.getType(Dst);
7489
7490 MatchInfo = [=](MachineIRBuilder &B) {
7491 auto Const = B.buildConstant(DstTy, C2 - C1);
7492 B.buildAdd(Dst, Sub->getLHSReg(), Const);
7493 };
7494
7495 return true;
7496}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1368
bool isNaN() const
Definition: APFloat.h:1358
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1152
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1739
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1572
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1613
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:997
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:774
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:759
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent)
Match FPOWI if it's safe to extend it into a series of multiplications.
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
const DataLayout & getDataLayout() const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineCopy(MachineInstr &MI)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
LLVMContext & getContext() const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent)
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo)
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo)
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine select to integer min/max.
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
const MachineFunction & getMachineFunction() const
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValue() const
Definition: Constants.h:313
const APFloat & getValueAPF() const
Definition: Constants.h:312
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isBigEndian() const
Definition: DataLayout.h:196
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:226
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:380
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents an integer subtraction.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FDIV Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:95
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1440
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1981
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:646
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:459
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1400
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1553
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:734
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1523
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1535
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1568
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1600
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:665
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1503
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1433
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:960
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
APInt getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:306
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:447
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1625
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:467
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:493
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1418
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:228
Extended Value Type.
Definition: ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:488
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:496
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...