LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
227 MachineInstr &MI, BuildFnTy &MatchInfo) {
228 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
229 Register DstOp = MI.getOperand(0).getReg();
230 Register OrigOp = MI.getOperand(1).getReg();
231
232 if (!MRI.hasOneNonDBGUse(OrigOp))
233 return false;
234
235 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
236 // Even if only a single operand of the PHI is not guaranteed non-poison,
237 // moving freeze() backwards across a PHI can cause optimization issues for
238 // other users of that operand.
239 //
240 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
241 // the source register is unprofitable because it makes the freeze() more
242 // strict than is necessary (it would affect the whole register instead of
243 // just the subreg being frozen).
244 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
245 return false;
246
247 if (canCreateUndefOrPoison(OrigOp, MRI,
248 /*ConsiderFlagsAndMetadata=*/false))
249 return false;
250
251 std::optional<MachineOperand> MaybePoisonOperand;
252 for (MachineOperand &Operand : OrigDef->uses()) {
253 if (!Operand.isReg())
254 return false;
255
256 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
257 continue;
258
259 if (!MaybePoisonOperand)
260 MaybePoisonOperand = Operand;
261 else {
262 // We have more than one maybe-poison operand. Moving the freeze is
263 // unsafe.
264 return false;
265 }
266 }
267
268 // Eliminate freeze if all operands are guaranteed non-poison.
269 if (!MaybePoisonOperand) {
270 MatchInfo = [=](MachineIRBuilder &B) {
271 Observer.changingInstr(*OrigDef);
272 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
273 Observer.changedInstr(*OrigDef);
274 B.buildCopy(DstOp, OrigOp);
275 };
276 return true;
277 }
278
279 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
280 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
281
282 MatchInfo = [=](MachineIRBuilder &B) mutable {
283 Observer.changingInstr(*OrigDef);
284 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
285 Observer.changedInstr(*OrigDef);
286 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
287 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
289 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
290 Freeze.getReg(0));
291 replaceRegWith(MRI, DstOp, OrigOp);
292 };
293 return true;
294}
295
298 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
299 "Invalid instruction");
300 bool IsUndef = true;
301 MachineInstr *Undef = nullptr;
302
303 // Walk over all the operands of concat vectors and check if they are
304 // build_vector themselves or undef.
305 // Then collect their operands in Ops.
306 for (const MachineOperand &MO : MI.uses()) {
307 Register Reg = MO.getReg();
308 MachineInstr *Def = MRI.getVRegDef(Reg);
309 assert(Def && "Operand not defined");
310 if (!MRI.hasOneNonDBGUse(Reg))
311 return false;
312 switch (Def->getOpcode()) {
313 case TargetOpcode::G_BUILD_VECTOR:
314 IsUndef = false;
315 // Remember the operands of the build_vector to fold
316 // them into the yet-to-build flattened concat vectors.
317 for (const MachineOperand &BuildVecMO : Def->uses())
318 Ops.push_back(BuildVecMO.getReg());
319 break;
320 case TargetOpcode::G_IMPLICIT_DEF: {
321 LLT OpType = MRI.getType(Reg);
322 // Keep one undef value for all the undef operands.
323 if (!Undef) {
324 Builder.setInsertPt(*MI.getParent(), MI);
325 Undef = Builder.buildUndef(OpType.getScalarType());
326 }
327 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
328 OpType.getScalarType() &&
329 "All undefs should have the same type");
330 // Break the undef vector in as many scalar elements as needed
331 // for the flattening.
332 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
333 EltIdx != EltEnd; ++EltIdx)
334 Ops.push_back(Undef->getOperand(0).getReg());
335 break;
336 }
337 default:
338 return false;
339 }
340 }
341
342 // Check if the combine is illegal
343 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
345 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
346 return false;
347 }
348
349 if (IsUndef)
350 Ops.clear();
351
352 return true;
353}
356 // We determined that the concat_vectors can be flatten.
357 // Generate the flattened build_vector.
358 Register DstReg = MI.getOperand(0).getReg();
359 Builder.setInsertPt(*MI.getParent(), MI);
360 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
361
362 // Note: IsUndef is sort of redundant. We could have determine it by
363 // checking that at all Ops are undef. Alternatively, we could have
364 // generate a build_vector of undefs and rely on another combine to
365 // clean that up. For now, given we already gather this information
366 // in matchCombineConcatVectors, just save compile time and issue the
367 // right thing.
368 if (Ops.empty())
369 Builder.buildUndef(NewDstReg);
370 else
371 Builder.buildBuildVector(NewDstReg, Ops);
372 MI.eraseFromParent();
373 replaceRegWith(MRI, DstReg, NewDstReg);
374}
375
378 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
379 auto ConcatMI1 =
380 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
381 auto ConcatMI2 =
382 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
383 if (!ConcatMI1 || !ConcatMI2)
384 return false;
385
386 // Check that the sources of the Concat instructions have the same type
387 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
388 MRI.getType(ConcatMI2->getSourceReg(0)))
389 return false;
390
391 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
392 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
393 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
394 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
395 // Check if the index takes a whole source register from G_CONCAT_VECTORS
396 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
397 if (Mask[i] == -1) {
398 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
399 if (i + j >= Mask.size())
400 return false;
401 if (Mask[i + j] != -1)
402 return false;
403 }
405 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
406 return false;
407 Ops.push_back(0);
408 } else if (Mask[i] % ConcatSrcNumElt == 0) {
409 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
410 if (i + j >= Mask.size())
411 return false;
412 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
413 return false;
414 }
415 // Retrieve the source register from its respective G_CONCAT_VECTORS
416 // instruction
417 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
418 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
419 } else {
420 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
421 ConcatMI1->getNumSources()));
422 }
423 } else {
424 return false;
425 }
426 }
427
429 {TargetOpcode::G_CONCAT_VECTORS,
430 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
431 return false;
432
433 return !Ops.empty();
434}
435
438 LLT SrcTy = MRI.getType(Ops[0]);
439 Register UndefReg = 0;
440
441 for (Register &Reg : Ops) {
442 if (Reg == 0) {
443 if (UndefReg == 0)
444 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
445 Reg = UndefReg;
446 }
447 }
448
449 if (Ops.size() > 1)
450 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
451 else
452 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
453 MI.eraseFromParent();
454}
455
458 if (matchCombineShuffleVector(MI, Ops)) {
460 return true;
461 }
462 return false;
463}
464
467 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
468 "Invalid instruction kind");
469 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
470 Register Src1 = MI.getOperand(1).getReg();
471 LLT SrcType = MRI.getType(Src1);
472 // As bizarre as it may look, shuffle vector can actually produce
473 // scalar! This is because at the IR level a <1 x ty> shuffle
474 // vector is perfectly valid.
475 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
476 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
477
478 // If the resulting vector is smaller than the size of the source
479 // vectors being concatenated, we won't be able to replace the
480 // shuffle vector into a concat_vectors.
481 //
482 // Note: We may still be able to produce a concat_vectors fed by
483 // extract_vector_elt and so on. It is less clear that would
484 // be better though, so don't bother for now.
485 //
486 // If the destination is a scalar, the size of the sources doesn't
487 // matter. we will lower the shuffle to a plain copy. This will
488 // work only if the source and destination have the same size. But
489 // that's covered by the next condition.
490 //
491 // TODO: If the size between the source and destination don't match
492 // we could still emit an extract vector element in that case.
493 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
494 return false;
495
496 // Check that the shuffle mask can be broken evenly between the
497 // different sources.
498 if (DstNumElts % SrcNumElts != 0)
499 return false;
500
501 // Mask length is a multiple of the source vector length.
502 // Check if the shuffle is some kind of concatenation of the input
503 // vectors.
504 unsigned NumConcat = DstNumElts / SrcNumElts;
505 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
506 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
507 for (unsigned i = 0; i != DstNumElts; ++i) {
508 int Idx = Mask[i];
509 // Undef value.
510 if (Idx < 0)
511 continue;
512 // Ensure the indices in each SrcType sized piece are sequential and that
513 // the same source is used for the whole piece.
514 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
515 (ConcatSrcs[i / SrcNumElts] >= 0 &&
516 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
517 return false;
518 // Remember which source this index came from.
519 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
520 }
521
522 // The shuffle is concatenating multiple vectors together.
523 // Collect the different operands for that.
524 Register UndefReg;
525 Register Src2 = MI.getOperand(2).getReg();
526 for (auto Src : ConcatSrcs) {
527 if (Src < 0) {
528 if (!UndefReg) {
529 Builder.setInsertPt(*MI.getParent(), MI);
530 UndefReg = Builder.buildUndef(SrcType).getReg(0);
531 }
532 Ops.push_back(UndefReg);
533 } else if (Src == 0)
534 Ops.push_back(Src1);
535 else
536 Ops.push_back(Src2);
537 }
538 return true;
539}
540
542 const ArrayRef<Register> Ops) {
543 Register DstReg = MI.getOperand(0).getReg();
544 Builder.setInsertPt(*MI.getParent(), MI);
545 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
546
547 if (Ops.size() == 1)
548 Builder.buildCopy(NewDstReg, Ops[0]);
549 else
550 Builder.buildMergeLikeInstr(NewDstReg, Ops);
551
552 MI.eraseFromParent();
553 replaceRegWith(MRI, DstReg, NewDstReg);
554}
555
557 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
558 "Invalid instruction kind");
559
560 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
561 return Mask.size() == 1;
562}
563
565 Register DstReg = MI.getOperand(0).getReg();
566 Builder.setInsertPt(*MI.getParent(), MI);
567
568 int I = MI.getOperand(3).getShuffleMask()[0];
569 Register Src1 = MI.getOperand(1).getReg();
570 LLT Src1Ty = MRI.getType(Src1);
571 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
572 Register SrcReg;
573 if (I >= Src1NumElts) {
574 SrcReg = MI.getOperand(2).getReg();
575 I -= Src1NumElts;
576 } else if (I >= 0)
577 SrcReg = Src1;
578
579 if (I < 0)
580 Builder.buildUndef(DstReg);
581 else if (!MRI.getType(SrcReg).isVector())
582 Builder.buildCopy(DstReg, SrcReg);
583 else
585
586 MI.eraseFromParent();
587}
588
589namespace {
590
591/// Select a preference between two uses. CurrentUse is the current preference
592/// while *ForCandidate is attributes of the candidate under consideration.
593PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
594 PreferredTuple &CurrentUse,
595 const LLT TyForCandidate,
596 unsigned OpcodeForCandidate,
597 MachineInstr *MIForCandidate) {
598 if (!CurrentUse.Ty.isValid()) {
599 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
600 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
601 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
602 return CurrentUse;
603 }
604
605 // We permit the extend to hoist through basic blocks but this is only
606 // sensible if the target has extending loads. If you end up lowering back
607 // into a load and extend during the legalizer then the end result is
608 // hoisting the extend up to the load.
609
610 // Prefer defined extensions to undefined extensions as these are more
611 // likely to reduce the number of instructions.
612 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
613 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
614 return CurrentUse;
615 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
616 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
617 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
618
619 // Prefer sign extensions to zero extensions as sign-extensions tend to be
620 // more expensive. Don't do this if the load is already a zero-extend load
621 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
622 // later.
623 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
624 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
625 OpcodeForCandidate == TargetOpcode::G_ZEXT)
626 return CurrentUse;
627 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
628 OpcodeForCandidate == TargetOpcode::G_SEXT)
629 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
630 }
631
632 // This is potentially target specific. We've chosen the largest type
633 // because G_TRUNC is usually free. One potential catch with this is that
634 // some targets have a reduced number of larger registers than smaller
635 // registers and this choice potentially increases the live-range for the
636 // larger value.
637 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
638 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
639 }
640 return CurrentUse;
641}
642
643/// Find a suitable place to insert some instructions and insert them. This
644/// function accounts for special cases like inserting before a PHI node.
645/// The current strategy for inserting before PHI's is to duplicate the
646/// instructions for each predecessor. However, while that's ok for G_TRUNC
647/// on most targets since it generally requires no code, other targets/cases may
648/// want to try harder to find a dominating block.
649static void InsertInsnsWithoutSideEffectsBeforeUse(
652 MachineOperand &UseMO)>
653 Inserter) {
654 MachineInstr &UseMI = *UseMO.getParent();
655
656 MachineBasicBlock *InsertBB = UseMI.getParent();
657
658 // If the use is a PHI then we want the predecessor block instead.
659 if (UseMI.isPHI()) {
660 MachineOperand *PredBB = std::next(&UseMO);
661 InsertBB = PredBB->getMBB();
662 }
663
664 // If the block is the same block as the def then we want to insert just after
665 // the def instead of at the start of the block.
666 if (InsertBB == DefMI.getParent()) {
668 Inserter(InsertBB, std::next(InsertPt), UseMO);
669 return;
670 }
671
672 // Otherwise we want the start of the BB
673 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
674}
675} // end anonymous namespace
676
678 PreferredTuple Preferred;
679 if (matchCombineExtendingLoads(MI, Preferred)) {
680 applyCombineExtendingLoads(MI, Preferred);
681 return true;
682 }
683 return false;
684}
685
686static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
687 unsigned CandidateLoadOpc;
688 switch (ExtOpc) {
689 case TargetOpcode::G_ANYEXT:
690 CandidateLoadOpc = TargetOpcode::G_LOAD;
691 break;
692 case TargetOpcode::G_SEXT:
693 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
694 break;
695 case TargetOpcode::G_ZEXT:
696 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
697 break;
698 default:
699 llvm_unreachable("Unexpected extend opc");
700 }
701 return CandidateLoadOpc;
702}
703
705 PreferredTuple &Preferred) {
706 // We match the loads and follow the uses to the extend instead of matching
707 // the extends and following the def to the load. This is because the load
708 // must remain in the same position for correctness (unless we also add code
709 // to find a safe place to sink it) whereas the extend is freely movable.
710 // It also prevents us from duplicating the load for the volatile case or just
711 // for performance.
712 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
713 if (!LoadMI)
714 return false;
715
716 Register LoadReg = LoadMI->getDstReg();
717
718 LLT LoadValueTy = MRI.getType(LoadReg);
719 if (!LoadValueTy.isScalar())
720 return false;
721
722 // Most architectures are going to legalize <s8 loads into at least a 1 byte
723 // load, and the MMOs can only describe memory accesses in multiples of bytes.
724 // If we try to perform extload combining on those, we can end up with
725 // %a(s8) = extload %ptr (load 1 byte from %ptr)
726 // ... which is an illegal extload instruction.
727 if (LoadValueTy.getSizeInBits() < 8)
728 return false;
729
730 // For non power-of-2 types, they will very likely be legalized into multiple
731 // loads. Don't bother trying to match them into extending loads.
732 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
733 return false;
734
735 // Find the preferred type aside from the any-extends (unless it's the only
736 // one) and non-extending ops. We'll emit an extending load to that type and
737 // and emit a variant of (extend (trunc X)) for the others according to the
738 // relative type sizes. At the same time, pick an extend to use based on the
739 // extend involved in the chosen type.
740 unsigned PreferredOpcode =
741 isa<GLoad>(&MI)
742 ? TargetOpcode::G_ANYEXT
743 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
744 Preferred = {LLT(), PreferredOpcode, nullptr};
745 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
746 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
747 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
748 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
749 const auto &MMO = LoadMI->getMMO();
750 // Don't do anything for atomics.
751 if (MMO.isAtomic())
752 continue;
753 // Check for legality.
754 if (!isPreLegalize()) {
755 LegalityQuery::MemDesc MMDesc(MMO);
756 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
757 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
758 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
759 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
760 .Action != LegalizeActions::Legal)
761 continue;
762 }
763 Preferred = ChoosePreferredUse(MI, Preferred,
764 MRI.getType(UseMI.getOperand(0).getReg()),
765 UseMI.getOpcode(), &UseMI);
766 }
767 }
768
769 // There were no extends
770 if (!Preferred.MI)
771 return false;
772 // It should be impossible to chose an extend without selecting a different
773 // type since by definition the result of an extend is larger.
774 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
775
776 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
777 return true;
778}
779
781 PreferredTuple &Preferred) {
782 // Rewrite the load to the chosen extending load.
783 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
784
785 // Inserter to insert a truncate back to the original type at a given point
786 // with some basic CSE to limit truncate duplication to one per BB.
788 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
789 MachineBasicBlock::iterator InsertBefore,
790 MachineOperand &UseMO) {
791 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
792 if (PreviouslyEmitted) {
794 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
796 return;
797 }
798
799 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
800 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
801 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
802 EmittedInsns[InsertIntoBB] = NewMI;
803 replaceRegOpWith(MRI, UseMO, NewDstReg);
804 };
805
807 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
808 MI.setDesc(Builder.getTII().get(LoadOpc));
809
810 // Rewrite all the uses to fix up the types.
811 auto &LoadValue = MI.getOperand(0);
813 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
814 Uses.push_back(&UseMO);
815
816 for (auto *UseMO : Uses) {
817 MachineInstr *UseMI = UseMO->getParent();
818
819 // If the extend is compatible with the preferred extend then we should fix
820 // up the type and extend so that it uses the preferred use.
821 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
822 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
823 Register UseDstReg = UseMI->getOperand(0).getReg();
824 MachineOperand &UseSrcMO = UseMI->getOperand(1);
825 const LLT UseDstTy = MRI.getType(UseDstReg);
826 if (UseDstReg != ChosenDstReg) {
827 if (Preferred.Ty == UseDstTy) {
828 // If the use has the same type as the preferred use, then merge
829 // the vregs and erase the extend. For example:
830 // %1:_(s8) = G_LOAD ...
831 // %2:_(s32) = G_SEXT %1(s8)
832 // %3:_(s32) = G_ANYEXT %1(s8)
833 // ... = ... %3(s32)
834 // rewrites to:
835 // %2:_(s32) = G_SEXTLOAD ...
836 // ... = ... %2(s32)
837 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
839 UseMO->getParent()->eraseFromParent();
840 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
841 // If the preferred size is smaller, then keep the extend but extend
842 // from the result of the extending load. For example:
843 // %1:_(s8) = G_LOAD ...
844 // %2:_(s32) = G_SEXT %1(s8)
845 // %3:_(s64) = G_ANYEXT %1(s8)
846 // ... = ... %3(s64)
847 /// rewrites to:
848 // %2:_(s32) = G_SEXTLOAD ...
849 // %3:_(s64) = G_ANYEXT %2:_(s32)
850 // ... = ... %3(s64)
851 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
852 } else {
853 // If the preferred size is large, then insert a truncate. For
854 // example:
855 // %1:_(s8) = G_LOAD ...
856 // %2:_(s64) = G_SEXT %1(s8)
857 // %3:_(s32) = G_ZEXT %1(s8)
858 // ... = ... %3(s32)
859 /// rewrites to:
860 // %2:_(s64) = G_SEXTLOAD ...
861 // %4:_(s8) = G_TRUNC %2:_(s32)
862 // %3:_(s64) = G_ZEXT %2:_(s8)
863 // ... = ... %3(s64)
864 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
865 InsertTruncAt);
866 }
867 continue;
868 }
869 // The use is (one of) the uses of the preferred use we chose earlier.
870 // We're going to update the load to def this value later so just erase
871 // the old extend.
873 UseMO->getParent()->eraseFromParent();
874 continue;
875 }
876
877 // The use isn't an extend. Truncate back to the type we originally loaded.
878 // This is free on many targets.
879 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
880 }
881
882 MI.getOperand(0).setReg(ChosenDstReg);
884}
885
887 BuildFnTy &MatchInfo) {
888 assert(MI.getOpcode() == TargetOpcode::G_AND);
889
890 // If we have the following code:
891 // %mask = G_CONSTANT 255
892 // %ld = G_LOAD %ptr, (load s16)
893 // %and = G_AND %ld, %mask
894 //
895 // Try to fold it into
896 // %ld = G_ZEXTLOAD %ptr, (load s8)
897
898 Register Dst = MI.getOperand(0).getReg();
899 if (MRI.getType(Dst).isVector())
900 return false;
901
902 auto MaybeMask =
903 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
904 if (!MaybeMask)
905 return false;
906
907 APInt MaskVal = MaybeMask->Value;
908
909 if (!MaskVal.isMask())
910 return false;
911
912 Register SrcReg = MI.getOperand(1).getReg();
913 // Don't use getOpcodeDef() here since intermediate instructions may have
914 // multiple users.
915 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
916 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
917 return false;
918
919 Register LoadReg = LoadMI->getDstReg();
920 LLT RegTy = MRI.getType(LoadReg);
921 Register PtrReg = LoadMI->getPointerReg();
922 unsigned RegSize = RegTy.getSizeInBits();
923 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
924 unsigned MaskSizeBits = MaskVal.countr_one();
925
926 // The mask may not be larger than the in-memory type, as it might cover sign
927 // extended bits
928 if (MaskSizeBits > LoadSizeBits.getValue())
929 return false;
930
931 // If the mask covers the whole destination register, there's nothing to
932 // extend
933 if (MaskSizeBits >= RegSize)
934 return false;
935
936 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
937 // at least byte loads. Avoid creating such loads here
938 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
939 return false;
940
941 const MachineMemOperand &MMO = LoadMI->getMMO();
942 LegalityQuery::MemDesc MemDesc(MMO);
943
944 // Don't modify the memory access size if this is atomic/volatile, but we can
945 // still adjust the opcode to indicate the high bit behavior.
946 if (LoadMI->isSimple())
947 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
948 else if (LoadSizeBits.getValue() > MaskSizeBits ||
949 LoadSizeBits.getValue() == RegSize)
950 return false;
951
952 // TODO: Could check if it's legal with the reduced or original memory size.
954 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
955 return false;
956
957 MatchInfo = [=](MachineIRBuilder &B) {
958 B.setInstrAndDebugLoc(*LoadMI);
959 auto &MF = B.getMF();
960 auto PtrInfo = MMO.getPointerInfo();
961 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
962 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
963 LoadMI->eraseFromParent();
964 };
965 return true;
966}
967
969 const MachineInstr &UseMI) {
970 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
971 "shouldn't consider debug uses");
972 assert(DefMI.getParent() == UseMI.getParent());
973 if (&DefMI == &UseMI)
974 return true;
975 const MachineBasicBlock &MBB = *DefMI.getParent();
976 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
977 return &MI == &DefMI || &MI == &UseMI;
978 });
979 if (DefOrUse == MBB.end())
980 llvm_unreachable("Block must contain both DefMI and UseMI!");
981 return &*DefOrUse == &DefMI;
982}
983
985 const MachineInstr &UseMI) {
986 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
987 "shouldn't consider debug uses");
988 if (MDT)
989 return MDT->dominates(&DefMI, &UseMI);
990 else if (DefMI.getParent() != UseMI.getParent())
991 return false;
992
993 return isPredecessor(DefMI, UseMI);
994}
995
997 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
998 Register SrcReg = MI.getOperand(1).getReg();
999 Register LoadUser = SrcReg;
1000
1001 if (MRI.getType(SrcReg).isVector())
1002 return false;
1003
1004 Register TruncSrc;
1005 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1006 LoadUser = TruncSrc;
1007
1008 uint64_t SizeInBits = MI.getOperand(2).getImm();
1009 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1010 // need any extend at all, just a truncate.
1011 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1012 // If truncating more than the original extended value, abort.
1013 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1014 if (TruncSrc &&
1015 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1016 return false;
1017 if (LoadSizeBits == SizeInBits)
1018 return true;
1019 }
1020 return false;
1021}
1022
1024 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1025 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1026 MI.eraseFromParent();
1027}
1028
1030 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1031 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1032
1033 Register DstReg = MI.getOperand(0).getReg();
1034 LLT RegTy = MRI.getType(DstReg);
1035
1036 // Only supports scalars for now.
1037 if (RegTy.isVector())
1038 return false;
1039
1040 Register SrcReg = MI.getOperand(1).getReg();
1041 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1042 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
1043 return false;
1044
1045 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1046
1047 // If the sign extend extends from a narrower width than the load's width,
1048 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1049 // Avoid widening the load at all.
1050 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1051
1052 // Don't generate G_SEXTLOADs with a < 1 byte width.
1053 if (NewSizeBits < 8)
1054 return false;
1055 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1056 // anyway for most targets.
1057 if (!isPowerOf2_32(NewSizeBits))
1058 return false;
1059
1060 const MachineMemOperand &MMO = LoadDef->getMMO();
1061 LegalityQuery::MemDesc MMDesc(MMO);
1062
1063 // Don't modify the memory access size if this is atomic/volatile, but we can
1064 // still adjust the opcode to indicate the high bit behavior.
1065 if (LoadDef->isSimple())
1066 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1067 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1068 return false;
1069
1070 // TODO: Could check if it's legal with the reduced or original memory size.
1071 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1072 {MRI.getType(LoadDef->getDstReg()),
1073 MRI.getType(LoadDef->getPointerReg())},
1074 {MMDesc}}))
1075 return false;
1076
1077 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1078 return true;
1079}
1080
1082 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1083 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1084 Register LoadReg;
1085 unsigned ScalarSizeBits;
1086 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1087 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1088
1089 // If we have the following:
1090 // %ld = G_LOAD %ptr, (load 2)
1091 // %ext = G_SEXT_INREG %ld, 8
1092 // ==>
1093 // %ld = G_SEXTLOAD %ptr (load 1)
1094
1095 auto &MMO = LoadDef->getMMO();
1096 Builder.setInstrAndDebugLoc(*LoadDef);
1097 auto &MF = Builder.getMF();
1098 auto PtrInfo = MMO.getPointerInfo();
1099 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1100 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1101 LoadDef->getPointerReg(), *NewMMO);
1102 MI.eraseFromParent();
1103}
1104
1105/// Return true if 'MI' is a load or a store that may be fold it's address
1106/// operand into the load / store addressing mode.
1110 auto *MF = MI->getMF();
1111 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1112 if (!Addr)
1113 return false;
1114
1115 AM.HasBaseReg = true;
1116 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1117 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1118 else
1119 AM.Scale = 1; // [reg +/- reg]
1120
1121 return TLI.isLegalAddressingMode(
1122 MF->getDataLayout(), AM,
1123 getTypeForLLT(MI->getMMO().getMemoryType(),
1124 MF->getFunction().getContext()),
1125 MI->getMMO().getAddrSpace());
1126}
1127
1128static unsigned getIndexedOpc(unsigned LdStOpc) {
1129 switch (LdStOpc) {
1130 case TargetOpcode::G_LOAD:
1131 return TargetOpcode::G_INDEXED_LOAD;
1132 case TargetOpcode::G_STORE:
1133 return TargetOpcode::G_INDEXED_STORE;
1134 case TargetOpcode::G_ZEXTLOAD:
1135 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1136 case TargetOpcode::G_SEXTLOAD:
1137 return TargetOpcode::G_INDEXED_SEXTLOAD;
1138 default:
1139 llvm_unreachable("Unexpected opcode");
1140 }
1141}
1142
1143bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1144 // Check for legality.
1145 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1146 LLT Ty = MRI.getType(LdSt.getReg(0));
1147 LLT MemTy = LdSt.getMMO().getMemoryType();
1149 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1151 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1152 SmallVector<LLT> OpTys;
1153 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1154 OpTys = {PtrTy, Ty, Ty};
1155 else
1156 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1157
1158 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1159 return isLegal(Q);
1160}
1161
1163 "post-index-use-threshold", cl::Hidden, cl::init(32),
1164 cl::desc("Number of uses of a base pointer to check before it is no longer "
1165 "considered for post-indexing."));
1166
1167bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1169 bool &RematOffset) {
1170 // We're looking for the following pattern, for either load or store:
1171 // %baseptr:_(p0) = ...
1172 // G_STORE %val(s64), %baseptr(p0)
1173 // %offset:_(s64) = G_CONSTANT i64 -256
1174 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1175 const auto &TLI = getTargetLowering();
1176
1177 Register Ptr = LdSt.getPointerReg();
1178 // If the store is the only use, don't bother.
1179 if (MRI.hasOneNonDBGUse(Ptr))
1180 return false;
1181
1182 if (!isIndexedLoadStoreLegal(LdSt))
1183 return false;
1184
1185 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1186 return false;
1187
1188 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1189 auto *PtrDef = MRI.getVRegDef(Ptr);
1190
1191 unsigned NumUsesChecked = 0;
1192 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1193 if (++NumUsesChecked > PostIndexUseThreshold)
1194 return false; // Try to avoid exploding compile time.
1195
1196 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1197 // The use itself might be dead. This can happen during combines if DCE
1198 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1199 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1200 continue;
1201
1202 // Check the user of this isn't the store, otherwise we'd be generate a
1203 // indexed store defining its own use.
1204 if (StoredValDef == &Use)
1205 continue;
1206
1207 Offset = PtrAdd->getOffsetReg();
1208 if (!ForceLegalIndexing &&
1209 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1210 /*IsPre*/ false, MRI))
1211 continue;
1212
1213 // Make sure the offset calculation is before the potentially indexed op.
1214 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1215 RematOffset = false;
1216 if (!dominates(*OffsetDef, LdSt)) {
1217 // If the offset however is just a G_CONSTANT, we can always just
1218 // rematerialize it where we need it.
1219 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1220 continue;
1221 RematOffset = true;
1222 }
1223
1224 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1225 if (&BasePtrUse == PtrDef)
1226 continue;
1227
1228 // If the user is a later load/store that can be post-indexed, then don't
1229 // combine this one.
1230 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1231 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1232 dominates(LdSt, *BasePtrLdSt) &&
1233 isIndexedLoadStoreLegal(*BasePtrLdSt))
1234 return false;
1235
1236 // Now we're looking for the key G_PTR_ADD instruction, which contains
1237 // the offset add that we want to fold.
1238 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1239 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1240 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1241 // If the use is in a different block, then we may produce worse code
1242 // due to the extra register pressure.
1243 if (BaseUseUse.getParent() != LdSt.getParent())
1244 return false;
1245
1246 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1247 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1248 return false;
1249 }
1250 if (!dominates(LdSt, BasePtrUse))
1251 return false; // All use must be dominated by the load/store.
1252 }
1253 }
1254
1255 Addr = PtrAdd->getReg(0);
1256 Base = PtrAdd->getBaseReg();
1257 return true;
1258 }
1259
1260 return false;
1261}
1262
1263bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1265 auto &MF = *LdSt.getParent()->getParent();
1266 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1267
1268 Addr = LdSt.getPointerReg();
1271 return false;
1272
1273 if (!ForceLegalIndexing &&
1274 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1275 return false;
1276
1277 if (!isIndexedLoadStoreLegal(LdSt))
1278 return false;
1279
1281 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1282 return false;
1283
1284 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1285 // Would require a copy.
1286 if (Base == St->getValueReg())
1287 return false;
1288
1289 // We're expecting one use of Addr in MI, but it could also be the
1290 // value stored, which isn't actually dominated by the instruction.
1291 if (St->getValueReg() == Addr)
1292 return false;
1293 }
1294
1295 // Avoid increasing cross-block register pressure.
1296 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1297 if (AddrUse.getParent() != LdSt.getParent())
1298 return false;
1299
1300 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1301 // That might allow us to end base's liveness here by adjusting the constant.
1302 bool RealUse = false;
1303 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1304 if (!dominates(LdSt, AddrUse))
1305 return false; // All use must be dominated by the load/store.
1306
1307 // If Ptr may be folded in addressing mode of other use, then it's
1308 // not profitable to do this transformation.
1309 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1310 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1311 RealUse = true;
1312 } else {
1313 RealUse = true;
1314 }
1315 }
1316 return RealUse;
1317}
1318
1320 BuildFnTy &MatchInfo) {
1321 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1322
1323 // Check if there is a load that defines the vector being extracted from.
1324 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1325 if (!LoadMI)
1326 return false;
1327
1328 Register Vector = MI.getOperand(1).getReg();
1329 LLT VecEltTy = MRI.getType(Vector).getElementType();
1330
1331 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1332
1333 // Checking whether we should reduce the load width.
1335 return false;
1336
1337 // Check if the defining load is simple.
1338 if (!LoadMI->isSimple())
1339 return false;
1340
1341 // If the vector element type is not a multiple of a byte then we are unable
1342 // to correctly compute an address to load only the extracted element as a
1343 // scalar.
1344 if (!VecEltTy.isByteSized())
1345 return false;
1346
1347 // Check for load fold barriers between the extraction and the load.
1348 if (MI.getParent() != LoadMI->getParent())
1349 return false;
1350 const unsigned MaxIter = 20;
1351 unsigned Iter = 0;
1352 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1353 if (II->isLoadFoldBarrier())
1354 return false;
1355 if (Iter++ == MaxIter)
1356 return false;
1357 }
1358
1359 // Check if the new load that we are going to create is legal
1360 // if we are in the post-legalization phase.
1361 MachineMemOperand MMO = LoadMI->getMMO();
1362 Align Alignment = MMO.getAlign();
1363 MachinePointerInfo PtrInfo;
1365
1366 // Finding the appropriate PtrInfo if offset is a known constant.
1367 // This is required to create the memory operand for the narrowed load.
1368 // This machine memory operand object helps us infer about legality
1369 // before we proceed to combine the instruction.
1370 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1371 int Elt = CVal->getZExtValue();
1372 // FIXME: should be (ABI size)*Elt.
1373 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1374 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1375 } else {
1376 // Discard the pointer info except the address space because the memory
1377 // operand can't represent this new access since the offset is variable.
1378 Offset = VecEltTy.getSizeInBits() / 8;
1380 }
1381
1382 Alignment = commonAlignment(Alignment, Offset);
1383
1384 Register VecPtr = LoadMI->getPointerReg();
1385 LLT PtrTy = MRI.getType(VecPtr);
1386
1387 MachineFunction &MF = *MI.getMF();
1388 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1389
1390 LegalityQuery::MemDesc MMDesc(*NewMMO);
1391
1392 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1393
1395 return false;
1396
1397 // Load must be allowed and fast on the target.
1399 auto &DL = MF.getDataLayout();
1400 unsigned Fast = 0;
1401 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1402 &Fast) ||
1403 !Fast)
1404 return false;
1405
1406 Register Result = MI.getOperand(0).getReg();
1407 Register Index = MI.getOperand(2).getReg();
1408
1409 MatchInfo = [=](MachineIRBuilder &B) {
1410 GISelObserverWrapper DummyObserver;
1411 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1412 //// Get pointer to the vector element.
1413 Register finalPtr = Helper.getVectorElementPointer(
1414 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1415 Index);
1416 // New G_LOAD instruction.
1417 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1418 // Remove original GLOAD instruction.
1419 LoadMI->eraseFromParent();
1420 };
1421
1422 return true;
1423}
1424
1427 auto &LdSt = cast<GLoadStore>(MI);
1428
1429 if (LdSt.isAtomic())
1430 return false;
1431
1432 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1433 MatchInfo.Offset);
1434 if (!MatchInfo.IsPre &&
1435 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1436 MatchInfo.Offset, MatchInfo.RematOffset))
1437 return false;
1438
1439 return true;
1440}
1441
1444 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1445 unsigned Opcode = MI.getOpcode();
1446 bool IsStore = Opcode == TargetOpcode::G_STORE;
1447 unsigned NewOpcode = getIndexedOpc(Opcode);
1448
1449 // If the offset constant didn't happen to dominate the load/store, we can
1450 // just clone it as needed.
1451 if (MatchInfo.RematOffset) {
1452 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1453 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1454 *OldCst->getOperand(1).getCImm());
1455 MatchInfo.Offset = NewCst.getReg(0);
1456 }
1457
1458 auto MIB = Builder.buildInstr(NewOpcode);
1459 if (IsStore) {
1460 MIB.addDef(MatchInfo.Addr);
1461 MIB.addUse(MI.getOperand(0).getReg());
1462 } else {
1463 MIB.addDef(MI.getOperand(0).getReg());
1464 MIB.addDef(MatchInfo.Addr);
1465 }
1466
1467 MIB.addUse(MatchInfo.Base);
1468 MIB.addUse(MatchInfo.Offset);
1469 MIB.addImm(MatchInfo.IsPre);
1470 MIB->cloneMemRefs(*MI.getMF(), MI);
1471 MI.eraseFromParent();
1472 AddrDef.eraseFromParent();
1473
1474 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1475}
1476
1478 MachineInstr *&OtherMI) {
1479 unsigned Opcode = MI.getOpcode();
1480 bool IsDiv, IsSigned;
1481
1482 switch (Opcode) {
1483 default:
1484 llvm_unreachable("Unexpected opcode!");
1485 case TargetOpcode::G_SDIV:
1486 case TargetOpcode::G_UDIV: {
1487 IsDiv = true;
1488 IsSigned = Opcode == TargetOpcode::G_SDIV;
1489 break;
1490 }
1491 case TargetOpcode::G_SREM:
1492 case TargetOpcode::G_UREM: {
1493 IsDiv = false;
1494 IsSigned = Opcode == TargetOpcode::G_SREM;
1495 break;
1496 }
1497 }
1498
1499 Register Src1 = MI.getOperand(1).getReg();
1500 unsigned DivOpcode, RemOpcode, DivremOpcode;
1501 if (IsSigned) {
1502 DivOpcode = TargetOpcode::G_SDIV;
1503 RemOpcode = TargetOpcode::G_SREM;
1504 DivremOpcode = TargetOpcode::G_SDIVREM;
1505 } else {
1506 DivOpcode = TargetOpcode::G_UDIV;
1507 RemOpcode = TargetOpcode::G_UREM;
1508 DivremOpcode = TargetOpcode::G_UDIVREM;
1509 }
1510
1511 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1512 return false;
1513
1514 // Combine:
1515 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1516 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1517 // into:
1518 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1519
1520 // Combine:
1521 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1522 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1523 // into:
1524 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1525
1526 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1527 if (MI.getParent() == UseMI.getParent() &&
1528 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1529 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1530 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1531 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1532 OtherMI = &UseMI;
1533 return true;
1534 }
1535 }
1536
1537 return false;
1538}
1539
1541 MachineInstr *&OtherMI) {
1542 unsigned Opcode = MI.getOpcode();
1543 assert(OtherMI && "OtherMI shouldn't be empty.");
1544
1545 Register DestDivReg, DestRemReg;
1546 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1547 DestDivReg = MI.getOperand(0).getReg();
1548 DestRemReg = OtherMI->getOperand(0).getReg();
1549 } else {
1550 DestDivReg = OtherMI->getOperand(0).getReg();
1551 DestRemReg = MI.getOperand(0).getReg();
1552 }
1553
1554 bool IsSigned =
1555 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1556
1557 // Check which instruction is first in the block so we don't break def-use
1558 // deps by "moving" the instruction incorrectly. Also keep track of which
1559 // instruction is first so we pick it's operands, avoiding use-before-def
1560 // bugs.
1561 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1562 Builder.setInstrAndDebugLoc(*FirstInst);
1563
1564 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1565 : TargetOpcode::G_UDIVREM,
1566 {DestDivReg, DestRemReg},
1567 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1568 MI.eraseFromParent();
1569 OtherMI->eraseFromParent();
1570}
1571
1573 MachineInstr *&BrCond) {
1574 assert(MI.getOpcode() == TargetOpcode::G_BR);
1575
1576 // Try to match the following:
1577 // bb1:
1578 // G_BRCOND %c1, %bb2
1579 // G_BR %bb3
1580 // bb2:
1581 // ...
1582 // bb3:
1583
1584 // The above pattern does not have a fall through to the successor bb2, always
1585 // resulting in a branch no matter which path is taken. Here we try to find
1586 // and replace that pattern with conditional branch to bb3 and otherwise
1587 // fallthrough to bb2. This is generally better for branch predictors.
1588
1589 MachineBasicBlock *MBB = MI.getParent();
1591 if (BrIt == MBB->begin())
1592 return false;
1593 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1594
1595 BrCond = &*std::prev(BrIt);
1596 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1597 return false;
1598
1599 // Check that the next block is the conditional branch target. Also make sure
1600 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1601 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1602 return BrCondTarget != MI.getOperand(0).getMBB() &&
1603 MBB->isLayoutSuccessor(BrCondTarget);
1604}
1605
1607 MachineInstr *&BrCond) {
1608 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1610 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1611 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1612 // this to i1 only since we might not know for sure what kind of
1613 // compare generated the condition value.
1614 auto True = Builder.buildConstant(
1615 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1616 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1617
1618 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1620 MI.getOperand(0).setMBB(FallthroughBB);
1622
1623 // Change the conditional branch to use the inverted condition and
1624 // new target block.
1625 Observer.changingInstr(*BrCond);
1626 BrCond->getOperand(0).setReg(Xor.getReg(0));
1627 BrCond->getOperand(1).setMBB(BrTarget);
1628 Observer.changedInstr(*BrCond);
1629}
1630
1631
1633 MachineIRBuilder HelperBuilder(MI);
1634 GISelObserverWrapper DummyObserver;
1635 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1636 return Helper.lowerMemcpyInline(MI) ==
1638}
1639
1641 MachineIRBuilder HelperBuilder(MI);
1642 GISelObserverWrapper DummyObserver;
1643 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1644 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1646}
1647
1649 const MachineRegisterInfo &MRI,
1650 const APFloat &Val) {
1651 APFloat Result(Val);
1652 switch (MI.getOpcode()) {
1653 default:
1654 llvm_unreachable("Unexpected opcode!");
1655 case TargetOpcode::G_FNEG: {
1656 Result.changeSign();
1657 return Result;
1658 }
1659 case TargetOpcode::G_FABS: {
1660 Result.clearSign();
1661 return Result;
1662 }
1663 case TargetOpcode::G_FPTRUNC: {
1664 bool Unused;
1665 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1667 &Unused);
1668 return Result;
1669 }
1670 case TargetOpcode::G_FSQRT: {
1671 bool Unused;
1673 &Unused);
1674 Result = APFloat(sqrt(Result.convertToDouble()));
1675 break;
1676 }
1677 case TargetOpcode::G_FLOG2: {
1678 bool Unused;
1680 &Unused);
1681 Result = APFloat(log2(Result.convertToDouble()));
1682 break;
1683 }
1684 }
1685 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1686 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1687 // `G_FLOG2` reach here.
1688 bool Unused;
1689 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1690 return Result;
1691}
1692
1694 const ConstantFP *Cst) {
1695 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1696 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1697 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1698 MI.eraseFromParent();
1699}
1700
1702 PtrAddChain &MatchInfo) {
1703 // We're trying to match the following pattern:
1704 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1705 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1706 // -->
1707 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1708
1709 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1710 return false;
1711
1712 Register Add2 = MI.getOperand(1).getReg();
1713 Register Imm1 = MI.getOperand(2).getReg();
1714 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1715 if (!MaybeImmVal)
1716 return false;
1717
1718 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1719 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1720 return false;
1721
1722 Register Base = Add2Def->getOperand(1).getReg();
1723 Register Imm2 = Add2Def->getOperand(2).getReg();
1724 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1725 if (!MaybeImm2Val)
1726 return false;
1727
1728 // Check if the new combined immediate forms an illegal addressing mode.
1729 // Do not combine if it was legal before but would get illegal.
1730 // To do so, we need to find a load/store user of the pointer to get
1731 // the access type.
1732 Type *AccessTy = nullptr;
1733 auto &MF = *MI.getMF();
1734 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1735 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1736 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1737 MF.getFunction().getContext());
1738 break;
1739 }
1740 }
1742 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1743 AMNew.BaseOffs = CombinedImm.getSExtValue();
1744 if (AccessTy) {
1745 AMNew.HasBaseReg = true;
1747 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1748 AMOld.HasBaseReg = true;
1749 unsigned AS = MRI.getType(Add2).getAddressSpace();
1750 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1751 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1752 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1753 return false;
1754 }
1755
1756 // Pass the combined immediate to the apply function.
1757 MatchInfo.Imm = AMNew.BaseOffs;
1758 MatchInfo.Base = Base;
1759 MatchInfo.Bank = getRegBank(Imm2);
1760 return true;
1761}
1762
1764 PtrAddChain &MatchInfo) {
1765 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1766 MachineIRBuilder MIB(MI);
1767 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1768 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1769 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1771 MI.getOperand(1).setReg(MatchInfo.Base);
1772 MI.getOperand(2).setReg(NewOffset.getReg(0));
1774}
1775
1777 RegisterImmPair &MatchInfo) {
1778 // We're trying to match the following pattern with any of
1779 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1780 // %t1 = SHIFT %base, G_CONSTANT imm1
1781 // %root = SHIFT %t1, G_CONSTANT imm2
1782 // -->
1783 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1784
1785 unsigned Opcode = MI.getOpcode();
1786 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1787 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1788 Opcode == TargetOpcode::G_USHLSAT) &&
1789 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1790
1791 Register Shl2 = MI.getOperand(1).getReg();
1792 Register Imm1 = MI.getOperand(2).getReg();
1793 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1794 if (!MaybeImmVal)
1795 return false;
1796
1797 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1798 if (Shl2Def->getOpcode() != Opcode)
1799 return false;
1800
1801 Register Base = Shl2Def->getOperand(1).getReg();
1802 Register Imm2 = Shl2Def->getOperand(2).getReg();
1803 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1804 if (!MaybeImm2Val)
1805 return false;
1806
1807 // Pass the combined immediate to the apply function.
1808 MatchInfo.Imm =
1809 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1810 MatchInfo.Reg = Base;
1811
1812 // There is no simple replacement for a saturating unsigned left shift that
1813 // exceeds the scalar size.
1814 if (Opcode == TargetOpcode::G_USHLSAT &&
1815 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1816 return false;
1817
1818 return true;
1819}
1820
1822 RegisterImmPair &MatchInfo) {
1823 unsigned Opcode = MI.getOpcode();
1824 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1825 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1826 Opcode == TargetOpcode::G_USHLSAT) &&
1827 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1828
1829 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1830 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1831 auto Imm = MatchInfo.Imm;
1832
1833 if (Imm >= ScalarSizeInBits) {
1834 // Any logical shift that exceeds scalar size will produce zero.
1835 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1836 Builder.buildConstant(MI.getOperand(0), 0);
1837 MI.eraseFromParent();
1838 return;
1839 }
1840 // Arithmetic shift and saturating signed left shift have no effect beyond
1841 // scalar size.
1842 Imm = ScalarSizeInBits - 1;
1843 }
1844
1845 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1846 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1848 MI.getOperand(1).setReg(MatchInfo.Reg);
1849 MI.getOperand(2).setReg(NewImm);
1851}
1852
1854 ShiftOfShiftedLogic &MatchInfo) {
1855 // We're trying to match the following pattern with any of
1856 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1857 // with any of G_AND/G_OR/G_XOR logic instructions.
1858 // %t1 = SHIFT %X, G_CONSTANT C0
1859 // %t2 = LOGIC %t1, %Y
1860 // %root = SHIFT %t2, G_CONSTANT C1
1861 // -->
1862 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1863 // %t4 = SHIFT %Y, G_CONSTANT C1
1864 // %root = LOGIC %t3, %t4
1865 unsigned ShiftOpcode = MI.getOpcode();
1866 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1867 ShiftOpcode == TargetOpcode::G_ASHR ||
1868 ShiftOpcode == TargetOpcode::G_LSHR ||
1869 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1870 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1871 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1872
1873 // Match a one-use bitwise logic op.
1874 Register LogicDest = MI.getOperand(1).getReg();
1875 if (!MRI.hasOneNonDBGUse(LogicDest))
1876 return false;
1877
1878 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1879 unsigned LogicOpcode = LogicMI->getOpcode();
1880 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1881 LogicOpcode != TargetOpcode::G_XOR)
1882 return false;
1883
1884 // Find a matching one-use shift by constant.
1885 const Register C1 = MI.getOperand(2).getReg();
1886 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1887 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1888 return false;
1889
1890 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1891
1892 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1893 // Shift should match previous one and should be a one-use.
1894 if (MI->getOpcode() != ShiftOpcode ||
1895 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1896 return false;
1897
1898 // Must be a constant.
1899 auto MaybeImmVal =
1900 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1901 if (!MaybeImmVal)
1902 return false;
1903
1904 ShiftVal = MaybeImmVal->Value.getSExtValue();
1905 return true;
1906 };
1907
1908 // Logic ops are commutative, so check each operand for a match.
1909 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1910 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1911 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1912 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1913 uint64_t C0Val;
1914
1915 if (matchFirstShift(LogicMIOp1, C0Val)) {
1916 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1917 MatchInfo.Shift2 = LogicMIOp1;
1918 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1919 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1920 MatchInfo.Shift2 = LogicMIOp2;
1921 } else
1922 return false;
1923
1924 MatchInfo.ValSum = C0Val + C1Val;
1925
1926 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1927 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1928 return false;
1929
1930 MatchInfo.Logic = LogicMI;
1931 return true;
1932}
1933
1935 ShiftOfShiftedLogic &MatchInfo) {
1936 unsigned Opcode = MI.getOpcode();
1937 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1938 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1939 Opcode == TargetOpcode::G_SSHLSAT) &&
1940 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1941
1942 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1943 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1944
1945 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1946
1947 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1948 Register Shift1 =
1949 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1950
1951 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1952 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1953 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1954 // remove old shift1. And it will cause crash later. So erase it earlier to
1955 // avoid the crash.
1956 MatchInfo.Shift2->eraseFromParent();
1957
1958 Register Shift2Const = MI.getOperand(2).getReg();
1959 Register Shift2 = Builder
1960 .buildInstr(Opcode, {DestType},
1961 {MatchInfo.LogicNonShiftReg, Shift2Const})
1962 .getReg(0);
1963
1964 Register Dest = MI.getOperand(0).getReg();
1965 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1966
1967 // This was one use so it's safe to remove it.
1968 MatchInfo.Logic->eraseFromParent();
1969
1970 MI.eraseFromParent();
1971}
1972
1974 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1975 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1976 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1977 auto &Shl = cast<GenericMachineInstr>(MI);
1978 Register DstReg = Shl.getReg(0);
1979 Register SrcReg = Shl.getReg(1);
1980 Register ShiftReg = Shl.getReg(2);
1981 Register X, C1;
1982
1983 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1984 return false;
1985
1986 if (!mi_match(SrcReg, MRI,
1988 m_GOr(m_Reg(X), m_Reg(C1))))))
1989 return false;
1990
1991 APInt C1Val, C2Val;
1992 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1993 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1994 return false;
1995
1996 auto *SrcDef = MRI.getVRegDef(SrcReg);
1997 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1998 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1999 LLT SrcTy = MRI.getType(SrcReg);
2000 MatchInfo = [=](MachineIRBuilder &B) {
2001 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2002 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2003 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2004 };
2005 return true;
2006}
2007
2009 unsigned &ShiftVal) {
2010 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2011 auto MaybeImmVal =
2012 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2013 if (!MaybeImmVal)
2014 return false;
2015
2016 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2017 return (static_cast<int32_t>(ShiftVal) != -1);
2018}
2019
2021 unsigned &ShiftVal) {
2022 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2023 MachineIRBuilder MIB(MI);
2024 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2025 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2027 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2028 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2030}
2031
2032// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2034 RegisterImmPair &MatchData) {
2035 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
2036 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2037 return false;
2038
2039 Register LHS = MI.getOperand(1).getReg();
2040
2041 Register ExtSrc;
2042 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2043 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2044 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2045 return false;
2046
2047 Register RHS = MI.getOperand(2).getReg();
2048 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2049 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2050 if (!MaybeShiftAmtVal)
2051 return false;
2052
2053 if (LI) {
2054 LLT SrcTy = MRI.getType(ExtSrc);
2055
2056 // We only really care about the legality with the shifted value. We can
2057 // pick any type the constant shift amount, so ask the target what to
2058 // use. Otherwise we would have to guess and hope it is reported as legal.
2059 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2060 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2061 return false;
2062 }
2063
2064 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2065 MatchData.Reg = ExtSrc;
2066 MatchData.Imm = ShiftAmt;
2067
2068 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2069 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2070 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2071}
2072
2074 const RegisterImmPair &MatchData) {
2075 Register ExtSrcReg = MatchData.Reg;
2076 int64_t ShiftAmtVal = MatchData.Imm;
2077
2078 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2079 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2080 auto NarrowShift =
2081 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2082 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2083 MI.eraseFromParent();
2084}
2085
2087 Register &MatchInfo) {
2088 GMerge &Merge = cast<GMerge>(MI);
2089 SmallVector<Register, 16> MergedValues;
2090 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2091 MergedValues.emplace_back(Merge.getSourceReg(I));
2092
2093 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2094 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2095 return false;
2096
2097 for (unsigned I = 0; I < MergedValues.size(); ++I)
2098 if (MergedValues[I] != Unmerge->getReg(I))
2099 return false;
2100
2101 MatchInfo = Unmerge->getSourceReg();
2102 return true;
2103}
2104
2106 const MachineRegisterInfo &MRI) {
2107 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2108 ;
2109
2110 return Reg;
2111}
2112
2115 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2116 "Expected an unmerge");
2117 auto &Unmerge = cast<GUnmerge>(MI);
2118 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2119
2120 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2121 if (!SrcInstr)
2122 return false;
2123
2124 // Check the source type of the merge.
2125 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2126 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2127 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2128 if (SrcMergeTy != Dst0Ty && !SameSize)
2129 return false;
2130 // They are the same now (modulo a bitcast).
2131 // We can collect all the src registers.
2132 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2133 Operands.push_back(SrcInstr->getSourceReg(Idx));
2134 return true;
2135}
2136
2139 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2140 "Expected an unmerge");
2141 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2142 "Not enough operands to replace all defs");
2143 unsigned NumElems = MI.getNumOperands() - 1;
2144
2145 LLT SrcTy = MRI.getType(Operands[0]);
2146 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2147 bool CanReuseInputDirectly = DstTy == SrcTy;
2148 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2149 Register DstReg = MI.getOperand(Idx).getReg();
2150 Register SrcReg = Operands[Idx];
2151
2152 // This combine may run after RegBankSelect, so we need to be aware of
2153 // register banks.
2154 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2155 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2156 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2157 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2158 }
2159
2160 if (CanReuseInputDirectly)
2161 replaceRegWith(MRI, DstReg, SrcReg);
2162 else
2163 Builder.buildCast(DstReg, SrcReg);
2164 }
2165 MI.eraseFromParent();
2166}
2167
2169 SmallVectorImpl<APInt> &Csts) {
2170 unsigned SrcIdx = MI.getNumOperands() - 1;
2171 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2172 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2173 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2174 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2175 return false;
2176 // Break down the big constant in smaller ones.
2177 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2178 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2179 ? CstVal.getCImm()->getValue()
2180 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2181
2182 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2183 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2184 // Unmerge a constant.
2185 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2186 Csts.emplace_back(Val.trunc(ShiftAmt));
2187 Val = Val.lshr(ShiftAmt);
2188 }
2189
2190 return true;
2191}
2192
2194 SmallVectorImpl<APInt> &Csts) {
2195 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2196 "Expected an unmerge");
2197 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2198 "Not enough operands to replace all defs");
2199 unsigned NumElems = MI.getNumOperands() - 1;
2200 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2201 Register DstReg = MI.getOperand(Idx).getReg();
2202 Builder.buildConstant(DstReg, Csts[Idx]);
2203 }
2204
2205 MI.eraseFromParent();
2206}
2207
2209 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2210 unsigned SrcIdx = MI.getNumOperands() - 1;
2211 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2212 MatchInfo = [&MI](MachineIRBuilder &B) {
2213 unsigned NumElems = MI.getNumOperands() - 1;
2214 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2215 Register DstReg = MI.getOperand(Idx).getReg();
2216 B.buildUndef(DstReg);
2217 }
2218 };
2219 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2220}
2221
2223 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2224 "Expected an unmerge");
2225 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2226 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2227 return false;
2228 // Check that all the lanes are dead except the first one.
2229 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2230 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2231 return false;
2232 }
2233 return true;
2234}
2235
2237 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2238 Register Dst0Reg = MI.getOperand(0).getReg();
2239 Builder.buildTrunc(Dst0Reg, SrcReg);
2240 MI.eraseFromParent();
2241}
2242
2244 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2245 "Expected an unmerge");
2246 Register Dst0Reg = MI.getOperand(0).getReg();
2247 LLT Dst0Ty = MRI.getType(Dst0Reg);
2248 // G_ZEXT on vector applies to each lane, so it will
2249 // affect all destinations. Therefore we won't be able
2250 // to simplify the unmerge to just the first definition.
2251 if (Dst0Ty.isVector())
2252 return false;
2253 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2254 LLT SrcTy = MRI.getType(SrcReg);
2255 if (SrcTy.isVector())
2256 return false;
2257
2258 Register ZExtSrcReg;
2259 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2260 return false;
2261
2262 // Finally we can replace the first definition with
2263 // a zext of the source if the definition is big enough to hold
2264 // all of ZExtSrc bits.
2265 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2266 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2267}
2268
2270 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2271 "Expected an unmerge");
2272
2273 Register Dst0Reg = MI.getOperand(0).getReg();
2274
2275 MachineInstr *ZExtInstr =
2276 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2277 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2278 "Expecting a G_ZEXT");
2279
2280 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2281 LLT Dst0Ty = MRI.getType(Dst0Reg);
2282 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2283
2284 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2285 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2286 } else {
2287 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2288 "ZExt src doesn't fit in destination");
2289 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2290 }
2291
2292 Register ZeroReg;
2293 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2294 if (!ZeroReg)
2295 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2296 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2297 }
2298 MI.eraseFromParent();
2299}
2300
2302 unsigned TargetShiftSize,
2303 unsigned &ShiftVal) {
2304 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2305 MI.getOpcode() == TargetOpcode::G_LSHR ||
2306 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2307
2308 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2309 if (Ty.isVector()) // TODO:
2310 return false;
2311
2312 // Don't narrow further than the requested size.
2313 unsigned Size = Ty.getSizeInBits();
2314 if (Size <= TargetShiftSize)
2315 return false;
2316
2317 auto MaybeImmVal =
2318 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2319 if (!MaybeImmVal)
2320 return false;
2321
2322 ShiftVal = MaybeImmVal->Value.getSExtValue();
2323 return ShiftVal >= Size / 2 && ShiftVal < Size;
2324}
2325
2327 const unsigned &ShiftVal) {
2328 Register DstReg = MI.getOperand(0).getReg();
2329 Register SrcReg = MI.getOperand(1).getReg();
2330 LLT Ty = MRI.getType(SrcReg);
2331 unsigned Size = Ty.getSizeInBits();
2332 unsigned HalfSize = Size / 2;
2333 assert(ShiftVal >= HalfSize);
2334
2335 LLT HalfTy = LLT::scalar(HalfSize);
2336
2337 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2338 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2339
2340 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2341 Register Narrowed = Unmerge.getReg(1);
2342
2343 // dst = G_LSHR s64:x, C for C >= 32
2344 // =>
2345 // lo, hi = G_UNMERGE_VALUES x
2346 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2347
2348 if (NarrowShiftAmt != 0) {
2349 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2350 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2351 }
2352
2353 auto Zero = Builder.buildConstant(HalfTy, 0);
2354 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2355 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2356 Register Narrowed = Unmerge.getReg(0);
2357 // dst = G_SHL s64:x, C for C >= 32
2358 // =>
2359 // lo, hi = G_UNMERGE_VALUES x
2360 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2361 if (NarrowShiftAmt != 0) {
2362 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2363 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2364 }
2365
2366 auto Zero = Builder.buildConstant(HalfTy, 0);
2367 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2368 } else {
2369 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2370 auto Hi = Builder.buildAShr(
2371 HalfTy, Unmerge.getReg(1),
2372 Builder.buildConstant(HalfTy, HalfSize - 1));
2373
2374 if (ShiftVal == HalfSize) {
2375 // (G_ASHR i64:x, 32) ->
2376 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2377 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2378 } else if (ShiftVal == Size - 1) {
2379 // Don't need a second shift.
2380 // (G_ASHR i64:x, 63) ->
2381 // %narrowed = (G_ASHR hi_32(x), 31)
2382 // G_MERGE_VALUES %narrowed, %narrowed
2383 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2384 } else {
2385 auto Lo = Builder.buildAShr(
2386 HalfTy, Unmerge.getReg(1),
2387 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2388
2389 // (G_ASHR i64:x, C) ->, for C >= 32
2390 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2391 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2392 }
2393 }
2394
2395 MI.eraseFromParent();
2396}
2397
2399 unsigned TargetShiftAmount) {
2400 unsigned ShiftAmt;
2401 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2402 applyCombineShiftToUnmerge(MI, ShiftAmt);
2403 return true;
2404 }
2405
2406 return false;
2407}
2408
2410 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2411 Register DstReg = MI.getOperand(0).getReg();
2412 LLT DstTy = MRI.getType(DstReg);
2413 Register SrcReg = MI.getOperand(1).getReg();
2414 return mi_match(SrcReg, MRI,
2415 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2416}
2417
2419 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2420 Register DstReg = MI.getOperand(0).getReg();
2421 Builder.buildCopy(DstReg, Reg);
2422 MI.eraseFromParent();
2423}
2424
2426 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2427 Register DstReg = MI.getOperand(0).getReg();
2428 Builder.buildZExtOrTrunc(DstReg, Reg);
2429 MI.eraseFromParent();
2430}
2431
2433 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2434 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2435 Register LHS = MI.getOperand(1).getReg();
2436 Register RHS = MI.getOperand(2).getReg();
2437 LLT IntTy = MRI.getType(LHS);
2438
2439 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2440 // instruction.
2441 PtrReg.second = false;
2442 for (Register SrcReg : {LHS, RHS}) {
2443 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2444 // Don't handle cases where the integer is implicitly converted to the
2445 // pointer width.
2446 LLT PtrTy = MRI.getType(PtrReg.first);
2447 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2448 return true;
2449 }
2450
2451 PtrReg.second = true;
2452 }
2453
2454 return false;
2455}
2456
2458 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2459 Register Dst = MI.getOperand(0).getReg();
2460 Register LHS = MI.getOperand(1).getReg();
2461 Register RHS = MI.getOperand(2).getReg();
2462
2463 const bool DoCommute = PtrReg.second;
2464 if (DoCommute)
2465 std::swap(LHS, RHS);
2466 LHS = PtrReg.first;
2467
2468 LLT PtrTy = MRI.getType(LHS);
2469
2470 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2471 Builder.buildPtrToInt(Dst, PtrAdd);
2472 MI.eraseFromParent();
2473}
2474
2476 APInt &NewCst) {
2477 auto &PtrAdd = cast<GPtrAdd>(MI);
2478 Register LHS = PtrAdd.getBaseReg();
2479 Register RHS = PtrAdd.getOffsetReg();
2481
2482 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2483 APInt Cst;
2484 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2485 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2486 // G_INTTOPTR uses zero-extension
2487 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2488 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2489 return true;
2490 }
2491 }
2492
2493 return false;
2494}
2495
2497 APInt &NewCst) {
2498 auto &PtrAdd = cast<GPtrAdd>(MI);
2499 Register Dst = PtrAdd.getReg(0);
2500
2501 Builder.buildConstant(Dst, NewCst);
2502 PtrAdd.eraseFromParent();
2503}
2504
2506 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2507 Register DstReg = MI.getOperand(0).getReg();
2508 Register SrcReg = MI.getOperand(1).getReg();
2509 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2510 if (OriginalSrcReg.isValid())
2511 SrcReg = OriginalSrcReg;
2512 LLT DstTy = MRI.getType(DstReg);
2513 return mi_match(SrcReg, MRI,
2514 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2515}
2516
2518 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2519 Register DstReg = MI.getOperand(0).getReg();
2520 Register SrcReg = MI.getOperand(1).getReg();
2521 LLT DstTy = MRI.getType(DstReg);
2522 if (mi_match(SrcReg, MRI,
2523 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2524 unsigned DstSize = DstTy.getScalarSizeInBits();
2525 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2526 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2527 }
2528 return false;
2529}
2530
2532 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2533 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2534 MI.getOpcode() == TargetOpcode::G_SEXT ||
2535 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2536 "Expected a G_[ASZ]EXT");
2537 Register SrcReg = MI.getOperand(1).getReg();
2538 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2539 if (OriginalSrcReg.isValid())
2540 SrcReg = OriginalSrcReg;
2541 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2542 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2543 unsigned Opc = MI.getOpcode();
2544 unsigned SrcOpc = SrcMI->getOpcode();
2545 if (Opc == SrcOpc ||
2546 (Opc == TargetOpcode::G_ANYEXT &&
2547 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2548 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2549 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2550 return true;
2551 }
2552 return false;
2553}
2554
2556 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2557 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2558 MI.getOpcode() == TargetOpcode::G_SEXT ||
2559 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2560 "Expected a G_[ASZ]EXT");
2561
2562 Register Reg = std::get<0>(MatchInfo);
2563 unsigned SrcExtOp = std::get<1>(MatchInfo);
2564
2565 // Combine exts with the same opcode.
2566 if (MI.getOpcode() == SrcExtOp) {
2568 MI.getOperand(1).setReg(Reg);
2570 return;
2571 }
2572
2573 // Combine:
2574 // - anyext([sz]ext x) to [sz]ext x
2575 // - sext(zext x) to zext x
2576 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2577 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2578 SrcExtOp == TargetOpcode::G_ZEXT)) {
2579 Register DstReg = MI.getOperand(0).getReg();
2580 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2581 MI.eraseFromParent();
2582 }
2583}
2584
2586 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2587 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2588 Register SrcReg = MI.getOperand(1).getReg();
2589 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2590 unsigned SrcOpc = SrcMI->getOpcode();
2591 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2592 SrcOpc == TargetOpcode::G_ZEXT) {
2593 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2594 return true;
2595 }
2596 return false;
2597}
2598
2600 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2601 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2602 Register SrcReg = MatchInfo.first;
2603 unsigned SrcExtOp = MatchInfo.second;
2604 Register DstReg = MI.getOperand(0).getReg();
2605 LLT SrcTy = MRI.getType(SrcReg);
2606 LLT DstTy = MRI.getType(DstReg);
2607 if (SrcTy == DstTy) {
2608 MI.eraseFromParent();
2609 replaceRegWith(MRI, DstReg, SrcReg);
2610 return;
2611 }
2612 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2613 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2614 else
2615 Builder.buildTrunc(DstReg, SrcReg);
2616 MI.eraseFromParent();
2617}
2618
2620 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2621 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2622
2623 // ShiftTy > 32 > TruncTy -> 32
2624 if (ShiftSize > 32 && TruncSize < 32)
2625 return ShiftTy.changeElementSize(32);
2626
2627 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2628 // Some targets like it, some don't, some only like it under certain
2629 // conditions/processor versions, etc.
2630 // A TL hook might be needed for this.
2631
2632 // Don't combine
2633 return ShiftTy;
2634}
2635
2637 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2638 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2639 Register DstReg = MI.getOperand(0).getReg();
2640 Register SrcReg = MI.getOperand(1).getReg();
2641
2642 if (!MRI.hasOneNonDBGUse(SrcReg))
2643 return false;
2644
2645 LLT SrcTy = MRI.getType(SrcReg);
2646 LLT DstTy = MRI.getType(DstReg);
2647
2648 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2649 const auto &TL = getTargetLowering();
2650
2651 LLT NewShiftTy;
2652 switch (SrcMI->getOpcode()) {
2653 default:
2654 return false;
2655 case TargetOpcode::G_SHL: {
2656 NewShiftTy = DstTy;
2657
2658 // Make sure new shift amount is legal.
2659 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2660 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2661 return false;
2662 break;
2663 }
2664 case TargetOpcode::G_LSHR:
2665 case TargetOpcode::G_ASHR: {
2666 // For right shifts, we conservatively do not do the transform if the TRUNC
2667 // has any STORE users. The reason is that if we change the type of the
2668 // shift, we may break the truncstore combine.
2669 //
2670 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2671 for (auto &User : MRI.use_instructions(DstReg))
2672 if (User.getOpcode() == TargetOpcode::G_STORE)
2673 return false;
2674
2675 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2676 if (NewShiftTy == SrcTy)
2677 return false;
2678
2679 // Make sure we won't lose information by truncating the high bits.
2680 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2681 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2682 DstTy.getScalarSizeInBits()))
2683 return false;
2684 break;
2685 }
2686 }
2687
2689 {SrcMI->getOpcode(),
2690 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2691 return false;
2692
2693 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2694 return true;
2695}
2696
2698 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2699 MachineInstr *ShiftMI = MatchInfo.first;
2700 LLT NewShiftTy = MatchInfo.second;
2701
2702 Register Dst = MI.getOperand(0).getReg();
2703 LLT DstTy = MRI.getType(Dst);
2704
2705 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2706 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2707 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2708
2709 Register NewShift =
2710 Builder
2711 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2712 .getReg(0);
2713
2714 if (NewShiftTy == DstTy)
2715 replaceRegWith(MRI, Dst, NewShift);
2716 else
2717 Builder.buildTrunc(Dst, NewShift);
2718
2719 eraseInst(MI);
2720}
2721
2723 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2724 return MO.isReg() &&
2725 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2726 });
2727}
2728
2730 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2731 return !MO.isReg() ||
2732 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2733 });
2734}
2735
2737 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2738 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2739 return all_of(Mask, [](int Elt) { return Elt < 0; });
2740}
2741
2743 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2744 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2745 MRI);
2746}
2747
2749 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2750 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2751 MRI);
2752}
2753
2755 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2756 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2757 "Expected an insert/extract element op");
2758 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2759 unsigned IdxIdx =
2760 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2761 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2762 if (!Idx)
2763 return false;
2764 return Idx->getZExtValue() >= VecTy.getNumElements();
2765}
2766
2768 GSelect &SelMI = cast<GSelect>(MI);
2769 auto Cst =
2771 if (!Cst)
2772 return false;
2773 OpIdx = Cst->isZero() ? 3 : 2;
2774 return true;
2775}
2776
2777void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2778
2780 const MachineOperand &MOP2) {
2781 if (!MOP1.isReg() || !MOP2.isReg())
2782 return false;
2783 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2784 if (!InstAndDef1)
2785 return false;
2786 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2787 if (!InstAndDef2)
2788 return false;
2789 MachineInstr *I1 = InstAndDef1->MI;
2790 MachineInstr *I2 = InstAndDef2->MI;
2791
2792 // Handle a case like this:
2793 //
2794 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2795 //
2796 // Even though %0 and %1 are produced by the same instruction they are not
2797 // the same values.
2798 if (I1 == I2)
2799 return MOP1.getReg() == MOP2.getReg();
2800
2801 // If we have an instruction which loads or stores, we can't guarantee that
2802 // it is identical.
2803 //
2804 // For example, we may have
2805 //
2806 // %x1 = G_LOAD %addr (load N from @somewhere)
2807 // ...
2808 // call @foo
2809 // ...
2810 // %x2 = G_LOAD %addr (load N from @somewhere)
2811 // ...
2812 // %or = G_OR %x1, %x2
2813 //
2814 // It's possible that @foo will modify whatever lives at the address we're
2815 // loading from. To be safe, let's just assume that all loads and stores
2816 // are different (unless we have something which is guaranteed to not
2817 // change.)
2818 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2819 return false;
2820
2821 // If both instructions are loads or stores, they are equal only if both
2822 // are dereferenceable invariant loads with the same number of bits.
2823 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2824 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2825 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2826 if (!LS1 || !LS2)
2827 return false;
2828
2829 if (!I2->isDereferenceableInvariantLoad() ||
2830 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2831 return false;
2832 }
2833
2834 // Check for physical registers on the instructions first to avoid cases
2835 // like this:
2836 //
2837 // %a = COPY $physreg
2838 // ...
2839 // SOMETHING implicit-def $physreg
2840 // ...
2841 // %b = COPY $physreg
2842 //
2843 // These copies are not equivalent.
2844 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2845 return MO.isReg() && MO.getReg().isPhysical();
2846 })) {
2847 // Check if we have a case like this:
2848 //
2849 // %a = COPY $physreg
2850 // %b = COPY %a
2851 //
2852 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2853 // From that, we know that they must have the same value, since they must
2854 // have come from the same COPY.
2855 return I1->isIdenticalTo(*I2);
2856 }
2857
2858 // We don't have any physical registers, so we don't necessarily need the
2859 // same vreg defs.
2860 //
2861 // On the off-chance that there's some target instruction feeding into the
2862 // instruction, let's use produceSameValue instead of isIdenticalTo.
2863 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2864 // Handle instructions with multiple defs that produce same values. Values
2865 // are same for operands with same index.
2866 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2867 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2868 // I1 and I2 are different instructions but produce same values,
2869 // %1 and %6 are same, %1 and %7 are not the same value.
2870 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2871 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2872 }
2873 return false;
2874}
2875
2877 if (!MOP.isReg())
2878 return false;
2879 auto *MI = MRI.getVRegDef(MOP.getReg());
2880 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2881 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2882 MaybeCst->getSExtValue() == C;
2883}
2884
2886 if (!MOP.isReg())
2887 return false;
2888 std::optional<FPValueAndVReg> MaybeCst;
2889 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2890 return false;
2891
2892 return MaybeCst->Value.isExactlyValue(C);
2893}
2894
2896 unsigned OpIdx) {
2897 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2898 Register OldReg = MI.getOperand(0).getReg();
2899 Register Replacement = MI.getOperand(OpIdx).getReg();
2900 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2901 MI.eraseFromParent();
2902 replaceRegWith(MRI, OldReg, Replacement);
2903}
2904
2906 Register Replacement) {
2907 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2908 Register OldReg = MI.getOperand(0).getReg();
2909 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2910 MI.eraseFromParent();
2911 replaceRegWith(MRI, OldReg, Replacement);
2912}
2913
2915 unsigned ConstIdx) {
2916 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2917 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2918
2919 // Get the shift amount
2920 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2921 if (!VRegAndVal)
2922 return false;
2923
2924 // Return true of shift amount >= Bitwidth
2925 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2926}
2927
2929 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2930 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2931 "This is not a funnel shift operation");
2932
2933 Register ConstReg = MI.getOperand(3).getReg();
2934 LLT ConstTy = MRI.getType(ConstReg);
2935 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2936
2937 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2938 assert((VRegAndVal) && "Value is not a constant");
2939
2940 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2941 APInt NewConst = VRegAndVal->Value.urem(
2942 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2943
2944 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2946 MI.getOpcode(), {MI.getOperand(0)},
2947 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2948
2949 MI.eraseFromParent();
2950}
2951
2953 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2954 // Match (cond ? x : x)
2955 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2956 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2957 MRI);
2958}
2959
2961 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2962 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2963 MRI);
2964}
2965
2967 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2968 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2969 MRI);
2970}
2971
2973 MachineOperand &MO = MI.getOperand(OpIdx);
2974 return MO.isReg() &&
2975 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2976}
2977
2979 unsigned OpIdx) {
2980 MachineOperand &MO = MI.getOperand(OpIdx);
2981 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2982}
2983
2985 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2986 Builder.buildFConstant(MI.getOperand(0), C);
2987 MI.eraseFromParent();
2988}
2989
2991 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2992 Builder.buildConstant(MI.getOperand(0), C);
2993 MI.eraseFromParent();
2994}
2995
2997 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2998 Builder.buildConstant(MI.getOperand(0), C);
2999 MI.eraseFromParent();
3000}
3001
3003 ConstantFP *CFP) {
3004 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3005 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3006 MI.eraseFromParent();
3007}
3008
3010 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3011 Builder.buildUndef(MI.getOperand(0));
3012 MI.eraseFromParent();
3013}
3014
3016 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3017 Register LHS = MI.getOperand(1).getReg();
3018 Register RHS = MI.getOperand(2).getReg();
3019 Register &NewLHS = std::get<0>(MatchInfo);
3020 Register &NewRHS = std::get<1>(MatchInfo);
3021
3022 // Helper lambda to check for opportunities for
3023 // ((0-A) + B) -> B - A
3024 // (A + (0-B)) -> A - B
3025 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3026 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3027 return false;
3028 NewLHS = MaybeNewLHS;
3029 return true;
3030 };
3031
3032 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3033}
3034
3037 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3038 "Invalid opcode");
3039 Register DstReg = MI.getOperand(0).getReg();
3040 LLT DstTy = MRI.getType(DstReg);
3041 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3042 unsigned NumElts = DstTy.getNumElements();
3043 // If this MI is part of a sequence of insert_vec_elts, then
3044 // don't do the combine in the middle of the sequence.
3045 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3046 TargetOpcode::G_INSERT_VECTOR_ELT)
3047 return false;
3048 MachineInstr *CurrInst = &MI;
3049 MachineInstr *TmpInst;
3050 int64_t IntImm;
3051 Register TmpReg;
3052 MatchInfo.resize(NumElts);
3053 while (mi_match(
3054 CurrInst->getOperand(0).getReg(), MRI,
3055 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3056 if (IntImm >= NumElts || IntImm < 0)
3057 return false;
3058 if (!MatchInfo[IntImm])
3059 MatchInfo[IntImm] = TmpReg;
3060 CurrInst = TmpInst;
3061 }
3062 // Variable index.
3063 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3064 return false;
3065 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3066 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3067 if (!MatchInfo[I - 1].isValid())
3068 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3069 }
3070 return true;
3071 }
3072 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3073 // overwritten, bail out.
3074 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3075 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3076}
3077
3080 Register UndefReg;
3081 auto GetUndef = [&]() {
3082 if (UndefReg)
3083 return UndefReg;
3084 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3085 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3086 return UndefReg;
3087 };
3088 for (Register &Reg : MatchInfo) {
3089 if (!Reg)
3090 Reg = GetUndef();
3091 }
3092 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3093 MI.eraseFromParent();
3094}
3095
3097 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3098 Register SubLHS, SubRHS;
3099 std::tie(SubLHS, SubRHS) = MatchInfo;
3100 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3101 MI.eraseFromParent();
3102}
3103
3106 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3107 //
3108 // Creates the new hand + logic instruction (but does not insert them.)
3109 //
3110 // On success, MatchInfo is populated with the new instructions. These are
3111 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3112 unsigned LogicOpcode = MI.getOpcode();
3113 assert(LogicOpcode == TargetOpcode::G_AND ||
3114 LogicOpcode == TargetOpcode::G_OR ||
3115 LogicOpcode == TargetOpcode::G_XOR);
3116 MachineIRBuilder MIB(MI);
3117 Register Dst = MI.getOperand(0).getReg();
3118 Register LHSReg = MI.getOperand(1).getReg();
3119 Register RHSReg = MI.getOperand(2).getReg();
3120
3121 // Don't recompute anything.
3122 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3123 return false;
3124
3125 // Make sure we have (hand x, ...), (hand y, ...)
3126 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3127 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3128 if (!LeftHandInst || !RightHandInst)
3129 return false;
3130 unsigned HandOpcode = LeftHandInst->getOpcode();
3131 if (HandOpcode != RightHandInst->getOpcode())
3132 return false;
3133 if (!LeftHandInst->getOperand(1).isReg() ||
3134 !RightHandInst->getOperand(1).isReg())
3135 return false;
3136
3137 // Make sure the types match up, and if we're doing this post-legalization,
3138 // we end up with legal types.
3139 Register X = LeftHandInst->getOperand(1).getReg();
3140 Register Y = RightHandInst->getOperand(1).getReg();
3141 LLT XTy = MRI.getType(X);
3142 LLT YTy = MRI.getType(Y);
3143 if (!XTy.isValid() || XTy != YTy)
3144 return false;
3145
3146 // Optional extra source register.
3147 Register ExtraHandOpSrcReg;
3148 switch (HandOpcode) {
3149 default:
3150 return false;
3151 case TargetOpcode::G_ANYEXT:
3152 case TargetOpcode::G_SEXT:
3153 case TargetOpcode::G_ZEXT: {
3154 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3155 break;
3156 }
3157 case TargetOpcode::G_TRUNC: {
3158 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3159 const MachineFunction *MF = MI.getMF();
3160 const DataLayout &DL = MF->getDataLayout();
3161 LLVMContext &Ctx = MF->getFunction().getContext();
3162
3163 LLT DstTy = MRI.getType(Dst);
3164 const TargetLowering &TLI = getTargetLowering();
3165
3166 // Be extra careful sinking truncate. If it's free, there's no benefit in
3167 // widening a binop.
3168 if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
3169 TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
3170 return false;
3171 break;
3172 }
3173 case TargetOpcode::G_AND:
3174 case TargetOpcode::G_ASHR:
3175 case TargetOpcode::G_LSHR:
3176 case TargetOpcode::G_SHL: {
3177 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3178 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3179 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3180 return false;
3181 ExtraHandOpSrcReg = ZOp.getReg();
3182 break;
3183 }
3184 }
3185
3186 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3187 return false;
3188
3189 // Record the steps to build the new instructions.
3190 //
3191 // Steps to build (logic x, y)
3192 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3193 OperandBuildSteps LogicBuildSteps = {
3194 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3195 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3196 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3197 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3198
3199 // Steps to build hand (logic x, y), ...z
3200 OperandBuildSteps HandBuildSteps = {
3201 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3202 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3203 if (ExtraHandOpSrcReg.isValid())
3204 HandBuildSteps.push_back(
3205 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3206 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3207
3208 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3209 return true;
3210}
3211
3214 assert(MatchInfo.InstrsToBuild.size() &&
3215 "Expected at least one instr to build?");
3216 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3217 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3218 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3219 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3220 for (auto &OperandFn : InstrToBuild.OperandFns)
3221 OperandFn(Instr);
3222 }
3223 MI.eraseFromParent();
3224}
3225
3227 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3228 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3229 int64_t ShlCst, AshrCst;
3230 Register Src;
3231 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3232 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3233 m_ICstOrSplat(AshrCst))))
3234 return false;
3235 if (ShlCst != AshrCst)
3236 return false;
3238 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3239 return false;
3240 MatchInfo = std::make_tuple(Src, ShlCst);
3241 return true;
3242}
3243
3245 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3246 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3247 Register Src;
3248 int64_t ShiftAmt;
3249 std::tie(Src, ShiftAmt) = MatchInfo;
3250 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3251 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3252 MI.eraseFromParent();
3253}
3254
3255/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3257 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3258 assert(MI.getOpcode() == TargetOpcode::G_AND);
3259
3260 Register Dst = MI.getOperand(0).getReg();
3261 LLT Ty = MRI.getType(Dst);
3262
3263 Register R;
3264 int64_t C1;
3265 int64_t C2;
3266 if (!mi_match(
3267 Dst, MRI,
3268 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3269 return false;
3270
3271 MatchInfo = [=](MachineIRBuilder &B) {
3272 if (C1 & C2) {
3273 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3274 return;
3275 }
3276 auto Zero = B.buildConstant(Ty, 0);
3277 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3278 };
3279 return true;
3280}
3281
3283 Register &Replacement) {
3284 // Given
3285 //
3286 // %y:_(sN) = G_SOMETHING
3287 // %x:_(sN) = G_SOMETHING
3288 // %res:_(sN) = G_AND %x, %y
3289 //
3290 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3291 //
3292 // Patterns like this can appear as a result of legalization. E.g.
3293 //
3294 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3295 // %one:_(s32) = G_CONSTANT i32 1
3296 // %and:_(s32) = G_AND %cmp, %one
3297 //
3298 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3299 assert(MI.getOpcode() == TargetOpcode::G_AND);
3300 if (!KB)
3301 return false;
3302
3303 Register AndDst = MI.getOperand(0).getReg();
3304 Register LHS = MI.getOperand(1).getReg();
3305 Register RHS = MI.getOperand(2).getReg();
3306
3307 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3308 // we can't do anything. If we do, then it depends on whether we have
3309 // KnownBits on the LHS.
3310 KnownBits RHSBits = KB->getKnownBits(RHS);
3311 if (RHSBits.isUnknown())
3312 return false;
3313
3314 KnownBits LHSBits = KB->getKnownBits(LHS);
3315
3316 // Check that x & Mask == x.
3317 // x & 1 == x, always
3318 // x & 0 == x, only if x is also 0
3319 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3320 //
3321 // Check if we can replace AndDst with the LHS of the G_AND
3322 if (canReplaceReg(AndDst, LHS, MRI) &&
3323 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3324 Replacement = LHS;
3325 return true;
3326 }
3327
3328 // Check if we can replace AndDst with the RHS of the G_AND
3329 if (canReplaceReg(AndDst, RHS, MRI) &&
3330 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3331 Replacement = RHS;
3332 return true;
3333 }
3334
3335 return false;
3336}
3337
3339 // Given
3340 //
3341 // %y:_(sN) = G_SOMETHING
3342 // %x:_(sN) = G_SOMETHING
3343 // %res:_(sN) = G_OR %x, %y
3344 //
3345 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3346 assert(MI.getOpcode() == TargetOpcode::G_OR);
3347 if (!KB)
3348 return false;
3349
3350 Register OrDst = MI.getOperand(0).getReg();
3351 Register LHS = MI.getOperand(1).getReg();
3352 Register RHS = MI.getOperand(2).getReg();
3353
3354 KnownBits LHSBits = KB->getKnownBits(LHS);
3355 KnownBits RHSBits = KB->getKnownBits(RHS);
3356
3357 // Check that x | Mask == x.
3358 // x | 0 == x, always
3359 // x | 1 == x, only if x is also 1
3360 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3361 //
3362 // Check if we can replace OrDst with the LHS of the G_OR
3363 if (canReplaceReg(OrDst, LHS, MRI) &&
3364 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3365 Replacement = LHS;
3366 return true;
3367 }
3368
3369 // Check if we can replace OrDst with the RHS of the G_OR
3370 if (canReplaceReg(OrDst, RHS, MRI) &&
3371 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3372 Replacement = RHS;
3373 return true;
3374 }
3375
3376 return false;
3377}
3378
3380 // If the input is already sign extended, just drop the extension.
3381 Register Src = MI.getOperand(1).getReg();
3382 unsigned ExtBits = MI.getOperand(2).getImm();
3383 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3384 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3385}
3386
3387static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3388 int64_t Cst, bool IsVector, bool IsFP) {
3389 // For i1, Cst will always be -1 regardless of boolean contents.
3390 return (ScalarSizeBits == 1 && Cst == -1) ||
3391 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3392}
3393
3395 SmallVectorImpl<Register> &RegsToNegate) {
3396 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3397 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3398 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3399 Register XorSrc;
3400 Register CstReg;
3401 // We match xor(src, true) here.
3402 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3403 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3404 return false;
3405
3406 if (!MRI.hasOneNonDBGUse(XorSrc))
3407 return false;
3408
3409 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3410 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3411 // list of tree nodes to visit.
3412 RegsToNegate.push_back(XorSrc);
3413 // Remember whether the comparisons are all integer or all floating point.
3414 bool IsInt = false;
3415 bool IsFP = false;
3416 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3417 Register Reg = RegsToNegate[I];
3418 if (!MRI.hasOneNonDBGUse(Reg))
3419 return false;
3420 MachineInstr *Def = MRI.getVRegDef(Reg);
3421 switch (Def->getOpcode()) {
3422 default:
3423 // Don't match if the tree contains anything other than ANDs, ORs and
3424 // comparisons.
3425 return false;
3426 case TargetOpcode::G_ICMP:
3427 if (IsFP)
3428 return false;
3429 IsInt = true;
3430 // When we apply the combine we will invert the predicate.
3431 break;
3432 case TargetOpcode::G_FCMP:
3433 if (IsInt)
3434 return false;
3435 IsFP = true;
3436 // When we apply the combine we will invert the predicate.
3437 break;
3438 case TargetOpcode::G_AND:
3439 case TargetOpcode::G_OR:
3440 // Implement De Morgan's laws:
3441 // ~(x & y) -> ~x | ~y
3442 // ~(x | y) -> ~x & ~y
3443 // When we apply the combine we will change the opcode and recursively
3444 // negate the operands.
3445 RegsToNegate.push_back(Def->getOperand(1).getReg());
3446 RegsToNegate.push_back(Def->getOperand(2).getReg());
3447 break;
3448 }
3449 }
3450
3451 // Now we know whether the comparisons are integer or floating point, check
3452 // the constant in the xor.
3453 int64_t Cst;
3454 if (Ty.isVector()) {
3455 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3456 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3457 if (!MaybeCst)
3458 return false;
3459 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3460 return false;
3461 } else {
3462 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3463 return false;
3464 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3465 return false;
3466 }
3467
3468 return true;
3469}
3470
3472 SmallVectorImpl<Register> &RegsToNegate) {
3473 for (Register Reg : RegsToNegate) {
3474 MachineInstr *Def = MRI.getVRegDef(Reg);
3475 Observer.changingInstr(*Def);
3476 // For each comparison, invert the opcode. For each AND and OR, change the
3477 // opcode.
3478 switch (Def->getOpcode()) {
3479 default:
3480 llvm_unreachable("Unexpected opcode");
3481 case TargetOpcode::G_ICMP:
3482 case TargetOpcode::G_FCMP: {
3483 MachineOperand &PredOp = Def->getOperand(1);
3486 PredOp.setPredicate(NewP);
3487 break;
3488 }
3489 case TargetOpcode::G_AND:
3490 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3491 break;
3492 case TargetOpcode::G_OR:
3493 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3494 break;
3495 }
3496 Observer.changedInstr(*Def);
3497 }
3498
3499 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3500 MI.eraseFromParent();
3501}
3502
3504 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3505 // Match (xor (and x, y), y) (or any of its commuted cases)
3506 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3507 Register &X = MatchInfo.first;
3508 Register &Y = MatchInfo.second;
3509 Register AndReg = MI.getOperand(1).getReg();
3510 Register SharedReg = MI.getOperand(2).getReg();
3511
3512 // Find a G_AND on either side of the G_XOR.
3513 // Look for one of
3514 //
3515 // (xor (and x, y), SharedReg)
3516 // (xor SharedReg, (and x, y))
3517 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3518 std::swap(AndReg, SharedReg);
3519 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3520 return false;
3521 }
3522
3523 // Only do this if we'll eliminate the G_AND.
3524 if (!MRI.hasOneNonDBGUse(AndReg))
3525 return false;
3526
3527 // We can combine if SharedReg is the same as either the LHS or RHS of the
3528 // G_AND.
3529 if (Y != SharedReg)
3530 std::swap(X, Y);
3531 return Y == SharedReg;
3532}
3533
3535 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3536 // Fold (xor (and x, y), y) -> (and (not x), y)
3537 Register X, Y;
3538 std::tie(X, Y) = MatchInfo;
3539 auto Not = Builder.buildNot(MRI.getType(X), X);
3541 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3542 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3543 MI.getOperand(2).setReg(Y);
3545}
3546
3548 auto &PtrAdd = cast<GPtrAdd>(MI);
3549 Register DstReg = PtrAdd.getReg(0);
3550 LLT Ty = MRI.getType(DstReg);
3552
3553 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3554 return false;
3555
3556 if (Ty.isPointer()) {
3557 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3558 return ConstVal && *ConstVal == 0;
3559 }
3560
3561 assert(Ty.isVector() && "Expecting a vector type");
3562 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3563 return isBuildVectorAllZeros(*VecMI, MRI);
3564}
3565
3567 auto &PtrAdd = cast<GPtrAdd>(MI);
3568 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3569 PtrAdd.eraseFromParent();
3570}
3571
3572/// The second source operand is known to be a power of 2.
3574 Register DstReg = MI.getOperand(0).getReg();
3575 Register Src0 = MI.getOperand(1).getReg();
3576 Register Pow2Src1 = MI.getOperand(2).getReg();
3577 LLT Ty = MRI.getType(DstReg);
3578
3579 // Fold (urem x, pow2) -> (and x, pow2-1)
3580 auto NegOne = Builder.buildConstant(Ty, -1);
3581 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3582 Builder.buildAnd(DstReg, Src0, Add);
3583 MI.eraseFromParent();
3584}
3585
3587 unsigned &SelectOpNo) {
3588 Register LHS = MI.getOperand(1).getReg();
3589 Register RHS = MI.getOperand(2).getReg();
3590
3591 Register OtherOperandReg = RHS;
3592 SelectOpNo = 1;
3594
3595 // Don't do this unless the old select is going away. We want to eliminate the
3596 // binary operator, not replace a binop with a select.
3597 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3599 OtherOperandReg = LHS;
3600 SelectOpNo = 2;
3602 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3604 return false;
3605 }
3606
3607 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3608 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3609
3610 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3611 /*AllowFP*/ true,
3612 /*AllowOpaqueConstants*/ false))
3613 return false;
3614 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3615 /*AllowFP*/ true,
3616 /*AllowOpaqueConstants*/ false))
3617 return false;
3618
3619 unsigned BinOpcode = MI.getOpcode();
3620
3621 // We know that one of the operands is a select of constants. Now verify that
3622 // the other binary operator operand is either a constant, or we can handle a
3623 // variable.
3624 bool CanFoldNonConst =
3625 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3626 (isNullOrNullSplat(*SelectLHS, MRI) ||
3627 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3628 (isNullOrNullSplat(*SelectRHS, MRI) ||
3629 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3630 if (CanFoldNonConst)
3631 return true;
3632
3633 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3634 /*AllowFP*/ true,
3635 /*AllowOpaqueConstants*/ false);
3636}
3637
3638/// \p SelectOperand is the operand in binary operator \p MI that is the select
3639/// to fold.
3641 const unsigned &SelectOperand) {
3642 Register Dst = MI.getOperand(0).getReg();
3643 Register LHS = MI.getOperand(1).getReg();
3644 Register RHS = MI.getOperand(2).getReg();
3645 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3646
3647 Register SelectCond = Select->getOperand(1).getReg();
3648 Register SelectTrue = Select->getOperand(2).getReg();
3649 Register SelectFalse = Select->getOperand(3).getReg();
3650
3651 LLT Ty = MRI.getType(Dst);
3652 unsigned BinOpcode = MI.getOpcode();
3653
3654 Register FoldTrue, FoldFalse;
3655
3656 // We have a select-of-constants followed by a binary operator with a
3657 // constant. Eliminate the binop by pulling the constant math into the select.
3658 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3659 if (SelectOperand == 1) {
3660 // TODO: SelectionDAG verifies this actually constant folds before
3661 // committing to the combine.
3662
3663 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3664 FoldFalse =
3665 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3666 } else {
3667 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3668 FoldFalse =
3669 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3670 }
3671
3672 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3673 MI.eraseFromParent();
3674}
3675
3676std::optional<SmallVector<Register, 8>>
3677CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3678 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3679 // We want to detect if Root is part of a tree which represents a bunch
3680 // of loads being merged into a larger load. We'll try to recognize patterns
3681 // like, for example:
3682 //
3683 // Reg Reg
3684 // \ /
3685 // OR_1 Reg
3686 // \ /
3687 // OR_2
3688 // \ Reg
3689 // .. /
3690 // Root
3691 //
3692 // Reg Reg Reg Reg
3693 // \ / \ /
3694 // OR_1 OR_2
3695 // \ /
3696 // \ /
3697 // ...
3698 // Root
3699 //
3700 // Each "Reg" may have been produced by a load + some arithmetic. This
3701 // function will save each of them.
3702 SmallVector<Register, 8> RegsToVisit;
3704
3705 // In the "worst" case, we're dealing with a load for each byte. So, there
3706 // are at most #bytes - 1 ORs.
3707 const unsigned MaxIter =
3708 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3709 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3710 if (Ors.empty())
3711 break;
3712 const MachineInstr *Curr = Ors.pop_back_val();
3713 Register OrLHS = Curr->getOperand(1).getReg();
3714 Register OrRHS = Curr->getOperand(2).getReg();
3715
3716 // In the combine, we want to elimate the entire tree.
3717 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3718 return std::nullopt;
3719
3720 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3721 // something that may be a load + arithmetic.
3722 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3723 Ors.push_back(Or);
3724 else
3725 RegsToVisit.push_back(OrLHS);
3726 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3727 Ors.push_back(Or);
3728 else
3729 RegsToVisit.push_back(OrRHS);
3730 }
3731
3732 // We're going to try and merge each register into a wider power-of-2 type,
3733 // so we ought to have an even number of registers.
3734 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3735 return std::nullopt;
3736 return RegsToVisit;
3737}
3738
3739/// Helper function for findLoadOffsetsForLoadOrCombine.
3740///
3741/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3742/// and then moving that value into a specific byte offset.
3743///
3744/// e.g. x[i] << 24
3745///
3746/// \returns The load instruction and the byte offset it is moved into.
3747static std::optional<std::pair<GZExtLoad *, int64_t>>
3748matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3749 const MachineRegisterInfo &MRI) {
3750 assert(MRI.hasOneNonDBGUse(Reg) &&
3751 "Expected Reg to only have one non-debug use?");
3752 Register MaybeLoad;
3753 int64_t Shift;
3754 if (!mi_match(Reg, MRI,
3755 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3756 Shift = 0;
3757 MaybeLoad = Reg;
3758 }
3759
3760 if (Shift % MemSizeInBits != 0)
3761 return std::nullopt;
3762
3763 // TODO: Handle other types of loads.
3764 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3765 if (!Load)
3766 return std::nullopt;
3767
3768 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3769 return std::nullopt;
3770
3771 return std::make_pair(Load, Shift / MemSizeInBits);
3772}
3773
3774std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3775CombinerHelper::findLoadOffsetsForLoadOrCombine(
3777 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3778
3779 // Each load found for the pattern. There should be one for each RegsToVisit.
3781
3782 // The lowest index used in any load. (The lowest "i" for each x[i].)
3783 int64_t LowestIdx = INT64_MAX;
3784
3785 // The load which uses the lowest index.
3786 GZExtLoad *LowestIdxLoad = nullptr;
3787
3788 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3789 SmallSet<int64_t, 8> SeenIdx;
3790
3791 // Ensure each load is in the same MBB.
3792 // TODO: Support multiple MachineBasicBlocks.
3793 MachineBasicBlock *MBB = nullptr;
3794 const MachineMemOperand *MMO = nullptr;
3795
3796 // Earliest instruction-order load in the pattern.
3797 GZExtLoad *EarliestLoad = nullptr;
3798
3799 // Latest instruction-order load in the pattern.
3800 GZExtLoad *LatestLoad = nullptr;
3801
3802 // Base pointer which every load should share.
3804
3805 // We want to find a load for each register. Each load should have some
3806 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3807 // track of the load which uses the lowest index. Later, we will check if we
3808 // can use its pointer in the final, combined load.
3809 for (auto Reg : RegsToVisit) {
3810 // Find the load, and find the position that it will end up in (e.g. a
3811 // shifted) value.
3812 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3813 if (!LoadAndPos)
3814 return std::nullopt;
3815 GZExtLoad *Load;
3816 int64_t DstPos;
3817 std::tie(Load, DstPos) = *LoadAndPos;
3818
3819 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3820 // it is difficult to check for stores/calls/etc between loads.
3821 MachineBasicBlock *LoadMBB = Load->getParent();
3822 if (!MBB)
3823 MBB = LoadMBB;
3824 if (LoadMBB != MBB)
3825 return std::nullopt;
3826
3827 // Make sure that the MachineMemOperands of every seen load are compatible.
3828 auto &LoadMMO = Load->getMMO();
3829 if (!MMO)
3830 MMO = &LoadMMO;
3831 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3832 return std::nullopt;
3833
3834 // Find out what the base pointer and index for the load is.
3835 Register LoadPtr;
3836 int64_t Idx;
3837 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3838 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3839 LoadPtr = Load->getOperand(1).getReg();
3840 Idx = 0;
3841 }
3842
3843 // Don't combine things like a[i], a[i] -> a bigger load.
3844 if (!SeenIdx.insert(Idx).second)
3845 return std::nullopt;
3846
3847 // Every load must share the same base pointer; don't combine things like:
3848 //
3849 // a[i], b[i + 1] -> a bigger load.
3850 if (!BasePtr.isValid())
3851 BasePtr = LoadPtr;
3852 if (BasePtr != LoadPtr)
3853 return std::nullopt;
3854
3855 if (Idx < LowestIdx) {
3856 LowestIdx = Idx;
3857 LowestIdxLoad = Load;
3858 }
3859
3860 // Keep track of the byte offset that this load ends up at. If we have seen
3861 // the byte offset, then stop here. We do not want to combine:
3862 //
3863 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3864 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3865 return std::nullopt;
3866 Loads.insert(Load);
3867
3868 // Keep track of the position of the earliest/latest loads in the pattern.
3869 // We will check that there are no load fold barriers between them later
3870 // on.
3871 //
3872 // FIXME: Is there a better way to check for load fold barriers?
3873 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3874 EarliestLoad = Load;
3875 if (!LatestLoad || dominates(*LatestLoad, *Load))
3876 LatestLoad = Load;
3877 }
3878
3879 // We found a load for each register. Let's check if each load satisfies the
3880 // pattern.
3881 assert(Loads.size() == RegsToVisit.size() &&
3882 "Expected to find a load for each register?");
3883 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3884 LatestLoad && "Expected at least two loads?");
3885
3886 // Check if there are any stores, calls, etc. between any of the loads. If
3887 // there are, then we can't safely perform the combine.
3888 //
3889 // MaxIter is chosen based off the (worst case) number of iterations it
3890 // typically takes to succeed in the LLVM test suite plus some padding.
3891 //
3892 // FIXME: Is there a better way to check for load fold barriers?
3893 const unsigned MaxIter = 20;
3894 unsigned Iter = 0;
3895 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3896 LatestLoad->getIterator())) {
3897 if (Loads.count(&MI))
3898 continue;
3899 if (MI.isLoadFoldBarrier())
3900 return std::nullopt;
3901 if (Iter++ == MaxIter)
3902 return std::nullopt;
3903 }
3904
3905 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3906}
3907
3909 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3910 assert(MI.getOpcode() == TargetOpcode::G_OR);
3911 MachineFunction &MF = *MI.getMF();
3912 // Assuming a little-endian target, transform:
3913 // s8 *a = ...
3914 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3915 // =>
3916 // s32 val = *((i32)a)
3917 //
3918 // s8 *a = ...
3919 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3920 // =>
3921 // s32 val = BSWAP(*((s32)a))
3922 Register Dst = MI.getOperand(0).getReg();
3923 LLT Ty = MRI.getType(Dst);
3924 if (Ty.isVector())
3925 return false;
3926
3927 // We need to combine at least two loads into this type. Since the smallest
3928 // possible load is into a byte, we need at least a 16-bit wide type.
3929 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3930 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3931 return false;
3932
3933 // Match a collection of non-OR instructions in the pattern.
3934 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3935 if (!RegsToVisit)
3936 return false;
3937
3938 // We have a collection of non-OR instructions. Figure out how wide each of
3939 // the small loads should be based off of the number of potential loads we
3940 // found.
3941 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3942 if (NarrowMemSizeInBits % 8 != 0)
3943 return false;
3944
3945 // Check if each register feeding into each OR is a load from the same
3946 // base pointer + some arithmetic.
3947 //
3948 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3949 //
3950 // Also verify that each of these ends up putting a[i] into the same memory
3951 // offset as a load into a wide type would.
3953 GZExtLoad *LowestIdxLoad, *LatestLoad;
3954 int64_t LowestIdx;
3955 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3956 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3957 if (!MaybeLoadInfo)
3958 return false;
3959 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3960
3961 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3962 // we found before, check if this corresponds to a big or little endian byte
3963 // pattern. If it does, then we can represent it using a load + possibly a
3964 // BSWAP.
3965 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3966 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3967 if (!IsBigEndian)
3968 return false;
3969 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3970 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3971 return false;
3972
3973 // Make sure that the load from the lowest index produces offset 0 in the
3974 // final value.
3975 //
3976 // This ensures that we won't combine something like this:
3977 //
3978 // load x[i] -> byte 2
3979 // load x[i+1] -> byte 0 ---> wide_load x[i]
3980 // load x[i+2] -> byte 1
3981 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3982 const unsigned ZeroByteOffset =
3983 *IsBigEndian
3984 ? bigEndianByteAt(NumLoadsInTy, 0)
3985 : littleEndianByteAt(NumLoadsInTy, 0);
3986 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3987 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3988 ZeroOffsetIdx->second != LowestIdx)
3989 return false;
3990
3991 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3992 // may not use index 0.
3993 Register Ptr = LowestIdxLoad->getPointerReg();
3994 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3995 LegalityQuery::MemDesc MMDesc(MMO);
3996 MMDesc.MemoryTy = Ty;
3998 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3999 return false;
4000 auto PtrInfo = MMO.getPointerInfo();
4001 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4002
4003 // Load must be allowed and fast on the target.
4005 auto &DL = MF.getDataLayout();
4006 unsigned Fast = 0;
4007 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4008 !Fast)
4009 return false;
4010
4011 MatchInfo = [=](MachineIRBuilder &MIB) {
4012 MIB.setInstrAndDebugLoc(*LatestLoad);
4013 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4014 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4015 if (NeedsBSwap)
4016 MIB.buildBSwap(Dst, LoadDst);
4017 };
4018 return true;
4019}
4020
4022 MachineInstr *&ExtMI) {
4023 auto &PHI = cast<GPhi>(MI);
4024 Register DstReg = PHI.getReg(0);
4025
4026 // TODO: Extending a vector may be expensive, don't do this until heuristics
4027 // are better.
4028 if (MRI.getType(DstReg).isVector())
4029 return false;
4030
4031 // Try to match a phi, whose only use is an extend.
4032 if (!MRI.hasOneNonDBGUse(DstReg))
4033 return false;
4034 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4035 switch (ExtMI->getOpcode()) {
4036 case TargetOpcode::G_ANYEXT:
4037 return true; // G_ANYEXT is usually free.
4038 case TargetOpcode::G_ZEXT:
4039 case TargetOpcode::G_SEXT:
4040 break;
4041 default:
4042 return false;
4043 }
4044
4045 // If the target is likely to fold this extend away, don't propagate.
4047 return false;
4048
4049 // We don't want to propagate the extends unless there's a good chance that
4050 // they'll be optimized in some way.
4051 // Collect the unique incoming values.
4053 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4054 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4055 switch (DefMI->getOpcode()) {
4056 case TargetOpcode::G_LOAD:
4057 case TargetOpcode::G_TRUNC:
4058 case TargetOpcode::G_SEXT:
4059 case TargetOpcode::G_ZEXT:
4060 case TargetOpcode::G_ANYEXT:
4061 case TargetOpcode::G_CONSTANT:
4062 InSrcs.insert(DefMI);
4063 // Don't try to propagate if there are too many places to create new
4064 // extends, chances are it'll increase code size.
4065 if (InSrcs.size() > 2)
4066 return false;
4067 break;
4068 default:
4069 return false;
4070 }
4071 }
4072 return true;
4073}
4074
4076 MachineInstr *&ExtMI) {
4077 auto &PHI = cast<GPhi>(MI);
4078 Register DstReg = ExtMI->getOperand(0).getReg();
4079 LLT ExtTy = MRI.getType(DstReg);
4080
4081 // Propagate the extension into the block of each incoming reg's block.
4082 // Use a SetVector here because PHIs can have duplicate edges, and we want
4083 // deterministic iteration order.
4086 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4087 auto SrcReg = PHI.getIncomingValue(I);
4088 auto *SrcMI = MRI.getVRegDef(SrcReg);
4089 if (!SrcMIs.insert(SrcMI))
4090 continue;
4091
4092 // Build an extend after each src inst.
4093 auto *MBB = SrcMI->getParent();
4094 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4095 if (InsertPt != MBB->end() && InsertPt->isPHI())
4096 InsertPt = MBB->getFirstNonPHI();
4097
4098 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4099 Builder.setDebugLoc(MI.getDebugLoc());
4100 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4101 OldToNewSrcMap[SrcMI] = NewExt;
4102 }
4103
4104 // Create a new phi with the extended inputs.
4106 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4107 NewPhi.addDef(DstReg);
4108 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4109 if (!MO.isReg()) {
4110 NewPhi.addMBB(MO.getMBB());
4111 continue;
4112 }
4113 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4114 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4115 }
4116 Builder.insertInstr(NewPhi);
4117 ExtMI->eraseFromParent();
4118}
4119
4121 Register &Reg) {
4122 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4123 // If we have a constant index, look for a G_BUILD_VECTOR source
4124 // and find the source register that the index maps to.
4125 Register SrcVec = MI.getOperand(1).getReg();
4126 LLT SrcTy = MRI.getType(SrcVec);
4127
4128 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4129 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4130 return false;
4131
4132 unsigned VecIdx = Cst->Value.getZExtValue();
4133
4134 // Check if we have a build_vector or build_vector_trunc with an optional
4135 // trunc in front.
4136 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4137 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4138 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4139 }
4140
4141 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4142 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4143 return false;
4144
4145 EVT Ty(getMVTForLLT(SrcTy));
4146 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4147 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4148 return false;
4149
4150 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4151 return true;
4152}
4153
4155 Register &Reg) {
4156 // Check the type of the register, since it may have come from a
4157 // G_BUILD_VECTOR_TRUNC.
4158 LLT ScalarTy = MRI.getType(Reg);
4159 Register DstReg = MI.getOperand(0).getReg();
4160 LLT DstTy = MRI.getType(DstReg);
4161
4162 if (ScalarTy != DstTy) {
4163 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4164 Builder.buildTrunc(DstReg, Reg);
4165 MI.eraseFromParent();
4166 return;
4167 }
4169}
4170
4173 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4174 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4175 // This combine tries to find build_vector's which have every source element
4176 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4177 // the masked load scalarization is run late in the pipeline. There's already
4178 // a combine for a similar pattern starting from the extract, but that
4179 // doesn't attempt to do it if there are multiple uses of the build_vector,
4180 // which in this case is true. Starting the combine from the build_vector
4181 // feels more natural than trying to find sibling nodes of extracts.
4182 // E.g.
4183 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4184 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4185 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4186 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4187 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4188 // ==>
4189 // replace ext{1,2,3,4} with %s{1,2,3,4}
4190
4191 Register DstReg = MI.getOperand(0).getReg();
4192 LLT DstTy = MRI.getType(DstReg);
4193 unsigned NumElts = DstTy.getNumElements();
4194
4195 SmallBitVector ExtractedElts(NumElts);
4196 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4197 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4198 return false;
4199 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4200 if (!Cst)
4201 return false;
4202 unsigned Idx = Cst->getZExtValue();
4203 if (Idx >= NumElts)
4204 return false; // Out of range.
4205 ExtractedElts.set(Idx);
4206 SrcDstPairs.emplace_back(
4207 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4208 }
4209 // Match if every element was extracted.
4210 return ExtractedElts.all();
4211}
4212
4215 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4216 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4217 for (auto &Pair : SrcDstPairs) {
4218 auto *ExtMI = Pair.second;
4219 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4220 ExtMI->eraseFromParent();
4221 }
4222 MI.eraseFromParent();
4223}
4224
4226 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4227 applyBuildFnNoErase(MI, MatchInfo);
4228 MI.eraseFromParent();
4229}
4230
4232 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4233 MatchInfo(Builder);
4234}
4235
4237 BuildFnTy &MatchInfo) {
4238 assert(MI.getOpcode() == TargetOpcode::G_OR);
4239
4240 Register Dst = MI.getOperand(0).getReg();
4241 LLT Ty = MRI.getType(Dst);
4242 unsigned BitWidth = Ty.getScalarSizeInBits();
4243
4244 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4245 unsigned FshOpc = 0;
4246
4247 // Match (or (shl ...), (lshr ...)).
4248 if (!mi_match(Dst, MRI,
4249 // m_GOr() handles the commuted version as well.
4250 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4251 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4252 return false;
4253
4254 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4255 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4256 int64_t CstShlAmt, CstLShrAmt;
4257 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4258 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4259 CstShlAmt + CstLShrAmt == BitWidth) {
4260 FshOpc = TargetOpcode::G_FSHR;
4261 Amt = LShrAmt;
4262
4263 } else if (mi_match(LShrAmt, MRI,
4265 ShlAmt == Amt) {
4266 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4267 FshOpc = TargetOpcode::G_FSHL;
4268
4269 } else if (mi_match(ShlAmt, MRI,
4271 LShrAmt == Amt) {
4272 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4273 FshOpc = TargetOpcode::G_FSHR;
4274
4275 } else {
4276 return false;
4277 }
4278
4279 LLT AmtTy = MRI.getType(Amt);
4280 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4281 return false;
4282
4283 MatchInfo = [=](MachineIRBuilder &B) {
4284 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4285 };
4286 return true;
4287}
4288
4289/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4291 unsigned Opc = MI.getOpcode();
4292 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4293 Register X = MI.getOperand(1).getReg();
4294 Register Y = MI.getOperand(2).getReg();
4295 if (X != Y)
4296 return false;
4297 unsigned RotateOpc =
4298 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4299 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4300}
4301
4303 unsigned Opc = MI.getOpcode();
4304 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4305 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4307 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4308 : TargetOpcode::G_ROTR));
4309 MI.removeOperand(2);
4311}
4312
4313// Fold (rot x, c) -> (rot x, c % BitSize)
4315 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4316 MI.getOpcode() == TargetOpcode::G_ROTR);
4317 unsigned Bitsize =
4318 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4319 Register AmtReg = MI.getOperand(2).getReg();
4320 bool OutOfRange = false;
4321 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4322 if (auto *CI = dyn_cast<ConstantInt>(C))
4323 OutOfRange |= CI->getValue().uge(Bitsize);
4324 return true;
4325 };
4326 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4327}
4328
4330 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4331 MI.getOpcode() == TargetOpcode::G_ROTR);
4332 unsigned Bitsize =
4333 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4334 Register Amt = MI.getOperand(2).getReg();
4335 LLT AmtTy = MRI.getType(Amt);
4336 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4337 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4339 MI.getOperand(2).setReg(Amt);
4341}
4342
4344 int64_t &MatchInfo) {
4345 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4346 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4347
4348 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4349 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4350 // KnownBits on the LHS in two cases:
4351 //
4352 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4353 // we cannot do any transforms so we can safely bail out early.
4354 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4355 // >=0.
4356 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4357 if (KnownRHS.isUnknown())
4358 return false;
4359
4360 std::optional<bool> KnownVal;
4361 if (KnownRHS.isZero()) {
4362 // ? uge 0 -> always true
4363 // ? ult 0 -> always false
4364 if (Pred == CmpInst::ICMP_UGE)
4365 KnownVal = true;
4366 else if (Pred == CmpInst::ICMP_ULT)
4367 KnownVal = false;
4368 }
4369
4370 if (!KnownVal) {
4371 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4372 switch (Pred) {
4373 default:
4374 llvm_unreachable("Unexpected G_ICMP predicate?");
4375 case CmpInst::ICMP_EQ:
4376 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4377 break;
4378 case CmpInst::ICMP_NE:
4379 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4380 break;
4381 case CmpInst::ICMP_SGE:
4382 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4383 break;
4384 case CmpInst::ICMP_SGT:
4385 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4386 break;
4387 case CmpInst::ICMP_SLE:
4388 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4389 break;
4390 case CmpInst::ICMP_SLT:
4391 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4392 break;
4393 case CmpInst::ICMP_UGE:
4394 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4395 break;
4396 case CmpInst::ICMP_UGT:
4397 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4398 break;
4399 case CmpInst::ICMP_ULE:
4400 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4401 break;
4402 case CmpInst::ICMP_ULT:
4403 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4404 break;
4405 }
4406 }
4407
4408 if (!KnownVal)
4409 return false;
4410 MatchInfo =
4411 *KnownVal
4413 /*IsVector = */
4414 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4415 /* IsFP = */ false)
4416 : 0;
4417 return true;
4418}
4419
4421 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4422 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4423 // Given:
4424 //
4425 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4426 // %cmp = G_ICMP ne %x, 0
4427 //
4428 // Or:
4429 //
4430 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4431 // %cmp = G_ICMP eq %x, 1
4432 //
4433 // We can replace %cmp with %x assuming true is 1 on the target.
4434 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4435 if (!CmpInst::isEquality(Pred))
4436 return false;
4437 Register Dst = MI.getOperand(0).getReg();
4438 LLT DstTy = MRI.getType(Dst);
4440 /* IsFP = */ false) != 1)
4441 return false;
4442 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4443 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4444 return false;
4445 Register LHS = MI.getOperand(2).getReg();
4446 auto KnownLHS = KB->getKnownBits(LHS);
4447 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4448 return false;
4449 // Make sure replacing Dst with the LHS is a legal operation.
4450 LLT LHSTy = MRI.getType(LHS);
4451 unsigned LHSSize = LHSTy.getSizeInBits();
4452 unsigned DstSize = DstTy.getSizeInBits();
4453 unsigned Op = TargetOpcode::COPY;
4454 if (DstSize != LHSSize)
4455 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4456 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4457 return false;
4458 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4459 return true;
4460}
4461
4462// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4464 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4465 assert(MI.getOpcode() == TargetOpcode::G_AND);
4466
4467 // Ignore vector types to simplify matching the two constants.
4468 // TODO: do this for vectors and scalars via a demanded bits analysis.
4469 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4470 if (Ty.isVector())
4471 return false;
4472
4473 Register Src;
4474 Register AndMaskReg;
4475 int64_t AndMaskBits;
4476 int64_t OrMaskBits;
4477 if (!mi_match(MI, MRI,
4478 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4479 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4480 return false;
4481
4482 // Check if OrMask could turn on any bits in Src.
4483 if (AndMaskBits & OrMaskBits)
4484 return false;
4485
4486 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4488 // Canonicalize the result to have the constant on the RHS.
4489 if (MI.getOperand(1).getReg() == AndMaskReg)
4490 MI.getOperand(2).setReg(AndMaskReg);
4491 MI.getOperand(1).setReg(Src);
4493 };
4494 return true;
4495}
4496
4497/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4499 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4500 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4501 Register Dst = MI.getOperand(0).getReg();
4502 Register Src = MI.getOperand(1).getReg();
4503 LLT Ty = MRI.getType(Src);
4505 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4506 return false;
4507 int64_t Width = MI.getOperand(2).getImm();
4508 Register ShiftSrc;
4509 int64_t ShiftImm;
4510 if (!mi_match(
4511 Src, MRI,
4512 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4513 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4514 return false;
4515 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4516 return false;
4517
4518 MatchInfo = [=](MachineIRBuilder &B) {
4519 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4520 auto Cst2 = B.buildConstant(ExtractTy, Width);
4521 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4522 };
4523 return true;
4524}
4525
4526/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4528 BuildFnTy &MatchInfo) {
4529 GAnd *And = cast<GAnd>(&MI);
4530 Register Dst = And->getReg(0);
4531 LLT Ty = MRI.getType(Dst);
4533 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4534 // into account.
4535 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4536 return false;
4537
4538 int64_t AndImm, LSBImm;
4539 Register ShiftSrc;
4540 const unsigned Size = Ty.getScalarSizeInBits();
4541 if (!mi_match(And->getReg(0), MRI,
4542 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4543 m_ICst(AndImm))))
4544 return false;
4545
4546 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4547 auto MaybeMask = static_cast<uint64_t>(AndImm);
4548 if (MaybeMask & (MaybeMask + 1))
4549 return false;
4550
4551 // LSB must fit within the register.
4552 if (static_cast<uint64_t>(LSBImm) >= Size)
4553 return false;
4554
4555 uint64_t Width = APInt(Size, AndImm).countr_one();
4556 MatchInfo = [=](MachineIRBuilder &B) {
4557 auto WidthCst = B.buildConstant(ExtractTy, Width);
4558 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4559 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4560 };
4561 return true;
4562}
4563
4565 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4566 const unsigned Opcode = MI.getOpcode();
4567 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4568
4569 const Register Dst = MI.getOperand(0).getReg();
4570
4571 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4572 ? TargetOpcode::G_SBFX
4573 : TargetOpcode::G_UBFX;
4574
4575 // Check if the type we would use for the extract is legal
4576 LLT Ty = MRI.getType(Dst);
4578 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4579 return false;
4580
4581 Register ShlSrc;
4582 int64_t ShrAmt;
4583 int64_t ShlAmt;
4584 const unsigned Size = Ty.getScalarSizeInBits();
4585
4586 // Try to match shr (shl x, c1), c2
4587 if (!mi_match(Dst, MRI,
4588 m_BinOp(Opcode,
4589 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4590 m_ICst(ShrAmt))))
4591 return false;
4592
4593 // Make sure that the shift sizes can fit a bitfield extract
4594 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4595 return false;
4596
4597 // Skip this combine if the G_SEXT_INREG combine could handle it
4598 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4599 return false;
4600
4601 // Calculate start position and width of the extract
4602 const int64_t Pos = ShrAmt - ShlAmt;
4603 const int64_t Width = Size - ShrAmt;
4604
4605 MatchInfo = [=](MachineIRBuilder &B) {
4606 auto WidthCst = B.buildConstant(ExtractTy, Width);
4607 auto PosCst = B.buildConstant(ExtractTy, Pos);
4608 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4609 };
4610 return true;
4611}
4612
4614 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4615 const unsigned Opcode = MI.getOpcode();
4616 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4617
4618 const Register Dst = MI.getOperand(0).getReg();
4619 LLT Ty = MRI.getType(Dst);
4621 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4622 return false;
4623
4624 // Try to match shr (and x, c1), c2
4625 Register AndSrc;
4626 int64_t ShrAmt;
4627 int64_t SMask;
4628 if (!mi_match(Dst, MRI,
4629 m_BinOp(Opcode,
4630 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4631 m_ICst(ShrAmt))))
4632 return false;
4633
4634 const unsigned Size = Ty.getScalarSizeInBits();
4635 if (ShrAmt < 0 || ShrAmt >= Size)
4636 return false;
4637
4638 // If the shift subsumes the mask, emit the 0 directly.
4639 if (0 == (SMask >> ShrAmt)) {
4640 MatchInfo = [=](MachineIRBuilder &B) {
4641 B.buildConstant(Dst, 0);
4642 };
4643 return true;
4644 }
4645
4646 // Check that ubfx can do the extraction, with no holes in the mask.
4647 uint64_t UMask = SMask;
4648 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4649 UMask &= maskTrailingOnes<uint64_t>(Size);
4650 if (!isMask_64(UMask))
4651 return false;
4652
4653 // Calculate start position and width of the extract.
4654 const int64_t Pos = ShrAmt;
4655 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4656
4657 // It's preferable to keep the shift, rather than form G_SBFX.
4658 // TODO: remove the G_AND via demanded bits analysis.
4659 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4660 return false;
4661
4662 MatchInfo = [=](MachineIRBuilder &B) {
4663 auto WidthCst = B.buildConstant(ExtractTy, Width);
4664 auto PosCst = B.buildConstant(ExtractTy, Pos);
4665 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4666 };
4667 return true;
4668}
4669
4670bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4671 MachineInstr &MI) {
4672 auto &PtrAdd = cast<GPtrAdd>(MI);
4673
4674 Register Src1Reg = PtrAdd.getBaseReg();
4675 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4676 if (!Src1Def)
4677 return false;
4678
4679 Register Src2Reg = PtrAdd.getOffsetReg();
4680
4681 if (MRI.hasOneNonDBGUse(Src1Reg))
4682 return false;
4683
4684 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4685 if (!C1)
4686 return false;
4687 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4688 if (!C2)
4689 return false;
4690
4691 const APInt &C1APIntVal = *C1;
4692 const APInt &C2APIntVal = *C2;
4693 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4694
4695 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4696 // This combine may end up running before ptrtoint/inttoptr combines
4697 // manage to eliminate redundant conversions, so try to look through them.
4698 MachineInstr *ConvUseMI = &UseMI;
4699 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4700 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4701 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4702 Register DefReg = ConvUseMI->getOperand(0).getReg();
4703 if (!MRI.hasOneNonDBGUse(DefReg))
4704 break;
4705 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4706 ConvUseOpc = ConvUseMI->getOpcode();
4707 }
4708 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4709 if (!LdStMI)
4710 continue;
4711 // Is x[offset2] already not a legal addressing mode? If so then
4712 // reassociating the constants breaks nothing (we test offset2 because
4713 // that's the one we hope to fold into the load or store).
4715 AM.HasBaseReg = true;
4716 AM.BaseOffs = C2APIntVal.getSExtValue();
4717 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4718 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4719 PtrAdd.getMF()->getFunction().getContext());
4720 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4721 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4722 AccessTy, AS))
4723 continue;
4724
4725 // Would x[offset1+offset2] still be a legal addressing mode?
4726 AM.BaseOffs = CombinedValue;
4727 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4728 AccessTy, AS))
4729 return true;
4730 }
4731
4732 return false;
4733}
4734
4736 MachineInstr *RHS,
4737 BuildFnTy &MatchInfo) {
4738 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4739 Register Src1Reg = MI.getOperand(1).getReg();
4740 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4741 return false;
4742 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4743 if (!C2)
4744 return false;
4745
4746 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4747 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4748
4749 auto NewBase =
4750 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4752 MI.getOperand(1).setReg(NewBase.getReg(0));
4753 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4755 };
4756 return !reassociationCanBreakAddressingModePattern(MI);
4757}
4758
4760 MachineInstr *LHS,
4761 MachineInstr *RHS,
4762 BuildFnTy &MatchInfo) {
4763 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4764 // if and only if (G_PTR_ADD X, C) has one use.
4765 Register LHSBase;
4766 std::optional<ValueAndVReg> LHSCstOff;
4767 if (!mi_match(MI.getBaseReg(), MRI,
4768 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4769 return false;
4770
4771 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4772 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4773 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4774 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4775 // doesn't happen.
4776 LHSPtrAdd->moveBefore(&MI);
4777 Register RHSReg = MI.getOffsetReg();
4778 // set VReg will cause type mismatch if it comes from extend/trunc
4779 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4781 MI.getOperand(2).setReg(NewCst.getReg(0));
4783 Observer.changingInstr(*LHSPtrAdd);
4784 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4785 Observer.changedInstr(*LHSPtrAdd);
4786 };
4787 return !reassociationCanBreakAddressingModePattern(MI);
4788}
4789
4791 MachineInstr *LHS,
4792 MachineInstr *RHS,
4793 BuildFnTy &MatchInfo) {
4794 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4795 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4796 if (!LHSPtrAdd)
4797 return false;
4798
4799 Register Src2Reg = MI.getOperand(2).getReg();
4800 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4801 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4802 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4803 if (!C1)
4804 return false;
4805 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4806 if (!C2)
4807 return false;
4808
4809 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4810 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4812 MI.getOperand(1).setReg(LHSSrc1);
4813 MI.getOperand(2).setReg(NewCst.getReg(0));
4815 };
4816 return !reassociationCanBreakAddressingModePattern(MI);
4817}
4818
4820 BuildFnTy &MatchInfo) {
4821 auto &PtrAdd = cast<GPtrAdd>(MI);
4822 // We're trying to match a few pointer computation patterns here for
4823 // re-association opportunities.
4824 // 1) Isolating a constant operand to be on the RHS, e.g.:
4825 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4826 //
4827 // 2) Folding two constants in each sub-tree as long as such folding
4828 // doesn't break a legal addressing mode.
4829 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4830 //
4831 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4832 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4833 // iif (G_PTR_ADD X, C) has one use.
4834 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4835 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4836
4837 // Try to match example 2.
4838 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4839 return true;
4840
4841 // Try to match example 3.
4842 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4843 return true;
4844
4845 // Try to match example 1.
4846 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4847 return true;
4848
4849 return false;
4850}
4852 Register OpLHS, Register OpRHS,
4853 BuildFnTy &MatchInfo) {
4854 LLT OpRHSTy = MRI.getType(OpRHS);
4855 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4856
4857 if (OpLHSDef->getOpcode() != Opc)
4858 return false;
4859
4860 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4861 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4862 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4863
4864 // If the inner op is (X op C), pull the constant out so it can be folded with
4865 // other constants in the expression tree. Folding is not guaranteed so we
4866 // might have (C1 op C2). In that case do not pull a constant out because it
4867 // won't help and can lead to infinite loops.
4870 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4871 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4872 MatchInfo = [=](MachineIRBuilder &B) {
4873 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4874 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4875 };
4876 return true;
4877 }
4878 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4879 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4880 // iff (op x, c1) has one use
4881 MatchInfo = [=](MachineIRBuilder &B) {
4882 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4883 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4884 };
4885 return true;
4886 }
4887 }
4888
4889 return false;
4890}
4891
4893 BuildFnTy &MatchInfo) {
4894 // We don't check if the reassociation will break a legal addressing mode
4895 // here since pointer arithmetic is handled by G_PTR_ADD.
4896 unsigned Opc = MI.getOpcode();
4897 Register DstReg = MI.getOperand(0).getReg();
4898 Register LHSReg = MI.getOperand(1).getReg();
4899 Register RHSReg = MI.getOperand(2).getReg();
4900
4901 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4902 return true;
4903 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4904 return true;
4905 return false;
4906}
4907
4909 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4910 Register SrcOp = MI.getOperand(1).getReg();
4911
4912 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4913 MatchInfo = *MaybeCst;
4914 return true;
4915 }
4916
4917 return false;
4918}
4919
4921 Register Op1 = MI.getOperand(1).getReg();
4922 Register Op2 = MI.getOperand(2).getReg();
4923 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4924 if (!MaybeCst)
4925 return false;
4926 MatchInfo = *MaybeCst;
4927 return true;
4928}
4929
4931 Register Op1 = MI.getOperand(1).getReg();
4932 Register Op2 = MI.getOperand(2).getReg();
4933 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4934 if (!MaybeCst)
4935 return false;
4936 MatchInfo =
4937 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4938 return true;
4939}
4940
4942 ConstantFP *&MatchInfo) {
4943 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4944 MI.getOpcode() == TargetOpcode::G_FMAD);
4945 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4946
4947 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4948 if (!Op3Cst)
4949 return false;
4950
4951 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4952 if (!Op2Cst)
4953 return false;
4954
4955 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4956 if (!Op1Cst)
4957 return false;
4958
4959 APFloat Op1F = Op1Cst->getValueAPF();
4960 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4962 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4963 return true;
4964}
4965
4967 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4968 // Look for a binop feeding into an AND with a mask:
4969 //
4970 // %add = G_ADD %lhs, %rhs
4971 // %and = G_AND %add, 000...11111111
4972 //
4973 // Check if it's possible to perform the binop at a narrower width and zext
4974 // back to the original width like so:
4975 //
4976 // %narrow_lhs = G_TRUNC %lhs
4977 // %narrow_rhs = G_TRUNC %rhs
4978 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4979 // %new_add = G_ZEXT %narrow_add
4980 // %and = G_AND %new_add, 000...11111111
4981 //
4982 // This can allow later combines to eliminate the G_AND if it turns out
4983 // that the mask is irrelevant.
4984 assert(MI.getOpcode() == TargetOpcode::G_AND);
4985 Register Dst = MI.getOperand(0).getReg();
4986 Register AndLHS = MI.getOperand(1).getReg();
4987 Register AndRHS = MI.getOperand(2).getReg();
4988 LLT WideTy = MRI.getType(Dst);
4989
4990 // If the potential binop has more than one use, then it's possible that one
4991 // of those uses will need its full width.
4992 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4993 return false;
4994
4995 // Check if the LHS feeding the AND is impacted by the high bits that we're
4996 // masking out.
4997 //
4998 // e.g. for 64-bit x, y:
4999 //
5000 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5001 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5002 if (!LHSInst)
5003 return false;
5004 unsigned LHSOpc = LHSInst->getOpcode();
5005 switch (LHSOpc) {
5006 default:
5007 return false;
5008 case TargetOpcode::G_ADD:
5009 case TargetOpcode::G_SUB:
5010 case TargetOpcode::G_MUL:
5011 case TargetOpcode::G_AND:
5012 case TargetOpcode::G_OR:
5013 case TargetOpcode::G_XOR:
5014 break;
5015 }
5016
5017 // Find the mask on the RHS.
5018 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5019 if (!Cst)
5020 return false;
5021 auto Mask = Cst->Value;
5022 if (!Mask.isMask())
5023 return false;
5024
5025 // No point in combining if there's nothing to truncate.
5026 unsigned NarrowWidth = Mask.countr_one();
5027 if (NarrowWidth == WideTy.getSizeInBits())
5028 return false;
5029 LLT NarrowTy = LLT::scalar(NarrowWidth);
5030
5031 // Check if adding the zext + truncates could be harmful.
5032 auto &MF = *MI.getMF();
5033 const auto &TLI = getTargetLowering();
5034 LLVMContext &Ctx = MF.getFunction().getContext();
5035 auto &DL = MF.getDataLayout();
5036 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
5037 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
5038 return false;
5039 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5040 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5041 return false;
5042 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5043 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5044 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5045 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5046 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5047 auto NarrowBinOp =
5048 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5049 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5051 MI.getOperand(1).setReg(Ext.getReg(0));
5053 };
5054 return true;
5055}
5056
5058 unsigned Opc = MI.getOpcode();
5059 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5060
5061 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5062 return false;
5063
5064 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5066 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5067 : TargetOpcode::G_SADDO;
5068 MI.setDesc(Builder.getTII().get(NewOpc));
5069 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5071 };
5072 return true;
5073}
5074
5076 // (G_*MULO x, 0) -> 0 + no carry out
5077 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5078 MI.getOpcode() == TargetOpcode::G_SMULO);
5079 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5080 return false;
5081 Register Dst = MI.getOperand(0).getReg();
5082 Register Carry = MI.getOperand(1).getReg();
5085 return false;
5086 MatchInfo = [=](MachineIRBuilder &B) {
5087 B.buildConstant(Dst, 0);
5088 B.buildConstant(Carry, 0);
5089 };
5090 return true;
5091}
5092
5094 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5095 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5096 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5097 MI.getOpcode() == TargetOpcode::G_SADDE ||
5098 MI.getOpcode() == TargetOpcode::G_USUBE ||
5099 MI.getOpcode() == TargetOpcode::G_SSUBE);
5100 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5101 return false;
5102 MatchInfo = [&](MachineIRBuilder &B) {
5103 unsigned NewOpcode;
5104 switch (MI.getOpcode()) {
5105 case TargetOpcode::G_UADDE:
5106 NewOpcode = TargetOpcode::G_UADDO;
5107 break;
5108 case TargetOpcode::G_SADDE:
5109 NewOpcode = TargetOpcode::G_SADDO;
5110 break;
5111 case TargetOpcode::G_USUBE:
5112 NewOpcode = TargetOpcode::G_USUBO;
5113 break;
5114 case TargetOpcode::G_SSUBE:
5115 NewOpcode = TargetOpcode::G_SSUBO;
5116 break;
5117 }
5119 MI.setDesc(B.getTII().get(NewOpcode));
5120 MI.removeOperand(4);
5122 };
5123 return true;
5124}
5125
5127 BuildFnTy &MatchInfo) {
5128 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5129 Register Dst = MI.getOperand(0).getReg();
5130 // (x + y) - z -> x (if y == z)
5131 // (x + y) - z -> y (if x == z)
5132 Register X, Y, Z;
5133 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5134 Register ReplaceReg;
5135 int64_t CstX, CstY;
5136 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5138 ReplaceReg = X;
5139 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5141 ReplaceReg = Y;
5142 if (ReplaceReg) {
5143 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5144 return true;
5145 }
5146 }
5147
5148 // x - (y + z) -> 0 - y (if x == z)
5149 // x - (y + z) -> 0 - z (if x == y)
5150 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5151 Register ReplaceReg;
5152 int64_t CstX;
5153 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5155 ReplaceReg = Y;
5156 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5158 ReplaceReg = Z;
5159 if (ReplaceReg) {
5160 MatchInfo = [=](MachineIRBuilder &B) {
5161 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5162 B.buildSub(Dst, Zero, ReplaceReg);
5163 };
5164 return true;
5165 }
5166 }
5167 return false;
5168}
5169
5171 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5172 auto &UDiv = cast<GenericMachineInstr>(MI);
5173 Register Dst = UDiv.getReg(0);
5174 Register LHS = UDiv.getReg(1);
5175 Register RHS = UDiv.getReg(2);
5176 LLT Ty = MRI.getType(Dst);
5177 LLT ScalarTy = Ty.getScalarType();
5178 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5180 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5181
5182 unsigned KnownLeadingZeros =
5184 auto &MIB = Builder;
5185
5186 bool UseSRL = false;
5187 bool UseNPQ = false;
5188 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5189 SmallVector<Register, 16> Shifts, Factors;
5190 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5191 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5192
5193 auto BuildExactUDIVPattern = [&](const Constant *C) {
5194 // Don't recompute inverses for each splat element.
5195 if (IsSplat && !Factors.empty()) {
5196 Shifts.push_back(Shifts[0]);
5197 Factors.push_back(Factors[0]);
5198 return true;
5199 }
5200
5201 auto *CI = cast<ConstantInt>(C);
5202 APInt Divisor = CI->getValue();
5203 unsigned Shift = Divisor.countr_zero();
5204 if (Shift) {
5205 Divisor.lshrInPlace(Shift);
5206 UseSRL = true;
5207 }
5208
5209 // Calculate the multiplicative inverse modulo BW.
5210 APInt Factor = Divisor.multiplicativeInverse();
5211 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5212 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5213 return true;
5214 };
5215
5216 auto BuildUDIVPattern = [&](const Constant *C) {
5217 auto *CI = cast<ConstantInt>(C);
5218 const APInt &Divisor = CI->getValue();
5219
5220 bool SelNPQ = false;
5221 APInt Magic(Divisor.getBitWidth(), 0);
5222 unsigned PreShift = 0, PostShift = 0;
5223
5224 // Magic algorithm doesn't work for division by 1. We need to emit a select
5225 // at the end.
5226 // TODO: Use undef values for divisor of 1.
5227 if (!Divisor.isOne()) {
5228
5229 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5230 // in the dividend exceeds the leading zeros for the divisor.
5233 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5234
5235 Magic = std::move(magics.Magic);
5236
5237 assert(magics.PreShift < Divisor.getBitWidth() &&
5238 "We shouldn't generate an undefined shift!");
5239 assert(magics.PostShift < Divisor.getBitWidth() &&
5240 "We shouldn't generate an undefined shift!");
5241 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5242 PreShift = magics.PreShift;
5243 PostShift = magics.PostShift;
5244 SelNPQ = magics.IsAdd;
5245 }
5246
5247 PreShifts.push_back(
5248 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5249 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5250 NPQFactors.push_back(
5251 MIB.buildConstant(ScalarTy,
5252 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5253 : APInt::getZero(EltBits))
5254 .getReg(0));
5255 PostShifts.push_back(
5256 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5257 UseNPQ |= SelNPQ;
5258 return true;
5259 };
5260
5261 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5262 // Collect all magic values from the build vector.
5263 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern);
5264 (void)Matched;
5265 assert(Matched && "Expected unary predicate match to succeed");
5266
5267 Register Shift, Factor;
5268 if (Ty.isVector()) {
5269 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5270 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5271 } else {
5272 Shift = Shifts[0];
5273 Factor = Factors[0];
5274 }
5275
5276 Register Res = LHS;
5277
5278 if (UseSRL)
5279 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5280
5281 return MIB.buildMul(Ty, Res, Factor);
5282 }
5283
5284 // Collect the shifts/magic values from each element.
5285 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5286 (void)Matched;
5287 assert(Matched && "Expected unary predicate match to succeed");
5288
5289 Register PreShift, PostShift, MagicFactor, NPQFactor;
5290 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5291 if (RHSDef) {
5292 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5293 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5294 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5295 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5296 } else {
5298 "Non-build_vector operation should have been a scalar");
5299 PreShift = PreShifts[0];
5300 MagicFactor = MagicFactors[0];
5301 PostShift = PostShifts[0];
5302 }
5303
5304 Register Q = LHS;
5305 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5306
5307 // Multiply the numerator (operand 0) by the magic value.
5308 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5309
5310 if (UseNPQ) {
5311 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5312
5313 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5314 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5315 if (Ty.isVector())
5316 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5317 else
5318 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5319
5320 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5321 }
5322
5323 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5324 auto One = MIB.buildConstant(Ty, 1);
5325 auto IsOne = MIB.buildICmp(
5327 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5328 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5329}
5330
5332 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5333 Register Dst = MI.getOperand(0).getReg();
5334 Register RHS = MI.getOperand(2).getReg();
5335 LLT DstTy = MRI.getType(Dst);
5336
5337 auto &MF = *MI.getMF();
5338 AttributeList Attr = MF.getFunction().getAttributes();
5339 const auto &TLI = getTargetLowering();
5340 LLVMContext &Ctx = MF.getFunction().getContext();
5341 auto &DL = MF.getDataLayout();
5342 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5343 return false;
5344
5345 // Don't do this for minsize because the instruction sequence is usually
5346 // larger.
5347 if (MF.getFunction().hasMinSize())
5348 return false;
5349
5350 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5351 return matchUnaryPredicate(
5352 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5353 }
5354
5355 auto *RHSDef = MRI.getVRegDef(RHS);
5356 if (!isConstantOrConstantVector(*RHSDef, MRI))
5357 return false;
5358
5359 // Don't do this if the types are not going to be legal.
5360 if (LI) {
5361 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5362 return false;
5363 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5364 return false;
5366 {TargetOpcode::G_ICMP,
5367 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5368 DstTy}}))
5369 return false;
5370 }
5371
5372 return matchUnaryPredicate(
5373 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5374}
5375
5377 auto *NewMI = buildUDivUsingMul(MI);
5378 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5379}
5380
5382 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5383 Register Dst = MI.getOperand(0).getReg();
5384 Register RHS = MI.getOperand(2).getReg();
5385 LLT DstTy = MRI.getType(Dst);
5386
5387 auto &MF = *MI.getMF();
5388 AttributeList Attr = MF.getFunction().getAttributes();
5389 const auto &TLI = getTargetLowering();
5390 LLVMContext &Ctx = MF.getFunction().getContext();
5391 auto &DL = MF.getDataLayout();
5392 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5393 return false;
5394
5395 // Don't do this for minsize because the instruction sequence is usually
5396 // larger.
5397 if (MF.getFunction().hasMinSize())
5398 return false;
5399
5400 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5401 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5402 return matchUnaryPredicate(
5403 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5404 }
5405
5406 // Don't support the general case for now.
5407 return false;
5408}
5409
5411 auto *NewMI = buildSDivUsingMul(MI);
5412 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5413}
5414
5416 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5417 auto &SDiv = cast<GenericMachineInstr>(MI);
5418 Register Dst = SDiv.getReg(0);
5419 Register LHS = SDiv.getReg(1);
5420 Register RHS = SDiv.getReg(2);
5421 LLT Ty = MRI.getType(Dst);
5422 LLT ScalarTy = Ty.getScalarType();
5424 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5425 auto &MIB = Builder;
5426
5427 bool UseSRA = false;
5428 SmallVector<Register, 16> Shifts, Factors;
5429
5430 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5431 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5432
5433 auto BuildSDIVPattern = [&](const Constant *C) {
5434 // Don't recompute inverses for each splat element.
5435 if (IsSplat && !Factors.empty()) {
5436 Shifts.push_back(Shifts[0]);
5437 Factors.push_back(Factors[0]);
5438 return true;
5439 }
5440
5441 auto *CI = cast<ConstantInt>(C);
5442 APInt Divisor = CI->getValue();
5443 unsigned Shift = Divisor.countr_zero();
5444 if (Shift) {
5445 Divisor.ashrInPlace(Shift);
5446 UseSRA = true;
5447 }
5448
5449 // Calculate the multiplicative inverse modulo BW.
5450 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5451 APInt Factor = Divisor.multiplicativeInverse();
5452 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5453 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5454 return true;
5455 };
5456
5457 // Collect all magic values from the build vector.
5458 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5459 (void)Matched;
5460 assert(Matched && "Expected unary predicate match to succeed");
5461
5462 Register Shift, Factor;
5463 if (Ty.isVector()) {
5464 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5465 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5466 } else {
5467 Shift = Shifts[0];
5468 Factor = Factors[0];
5469 }
5470
5471 Register Res = LHS;
5472
5473 if (UseSRA)
5474 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5475
5476 return MIB.buildMul(Ty, Res, Factor);
5477}
5478
5480 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5481 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5482 "Expected SDIV or UDIV");
5483 auto &Div = cast<GenericMachineInstr>(MI);
5484 Register RHS = Div.getReg(2);
5485 auto MatchPow2 = [&](const Constant *C) {
5486 auto *CI = dyn_cast<ConstantInt>(C);
5487 return CI && (CI->getValue().isPowerOf2() ||
5488 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5489 };
5490 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5491}
5492
5494 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5495 auto &SDiv = cast<GenericMachineInstr>(MI);
5496 Register Dst = SDiv.getReg(0);
5497 Register LHS = SDiv.getReg(1);
5498 Register RHS = SDiv.getReg(2);
5499 LLT Ty = MRI.getType(Dst);
5501 LLT CCVT =
5502 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5503
5504 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5505 // to the following version:
5506 //
5507 // %c1 = G_CTTZ %rhs
5508 // %inexact = G_SUB $bitwidth, %c1
5509 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5510 // %lshr = G_LSHR %sign, %inexact
5511 // %add = G_ADD %lhs, %lshr
5512 // %ashr = G_ASHR %add, %c1
5513 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5514 // %zero = G_CONSTANT $0
5515 // %neg = G_NEG %ashr
5516 // %isneg = G_ICMP SLT %rhs, %zero
5517 // %res = G_SELECT %isneg, %neg, %ashr
5518
5519 unsigned BitWidth = Ty.getScalarSizeInBits();
5520 auto Zero = Builder.buildConstant(Ty, 0);
5521
5522 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5523 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5524 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5525 // Splat the sign bit into the register
5526 auto Sign = Builder.buildAShr(
5527 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5528
5529 // Add (LHS < 0) ? abs2 - 1 : 0;
5530 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5531 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5532 auto AShr = Builder.buildAShr(Ty, Add, C1);
5533
5534 // Special case: (sdiv X, 1) -> X
5535 // Special Case: (sdiv X, -1) -> 0-X
5536 auto One = Builder.buildConstant(Ty, 1);
5537 auto MinusOne = Builder.buildConstant(Ty, -1);
5538 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5539 auto IsMinusOne =
5541 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5542 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5543
5544 // If divided by a positive value, we're done. Otherwise, the result must be
5545 // negated.
5546 auto Neg = Builder.buildNeg(Ty, AShr);
5547 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5548 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5549 MI.eraseFromParent();
5550}
5551
5553 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5554 auto &UDiv = cast<GenericMachineInstr>(MI);
5555 Register Dst = UDiv.getReg(0);
5556 Register LHS = UDiv.getReg(1);
5557 Register RHS = UDiv.getReg(2);
5558 LLT Ty = MRI.getType(Dst);
5560
5561 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5562 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5563 MI.eraseFromParent();
5564}
5565
5567 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5568 Register RHS = MI.getOperand(2).getReg();
5569 Register Dst = MI.getOperand(0).getReg();
5570 LLT Ty = MRI.getType(Dst);
5572 auto MatchPow2ExceptOne = [&](const Constant *C) {
5573 if (auto *CI = dyn_cast<ConstantInt>(C))
5574 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5575 return false;
5576 };
5577 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5578 return false;
5579 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5580}
5581
5583 Register LHS = MI.getOperand(1).getReg();
5584 Register RHS = MI.getOperand(2).getReg();
5585 Register Dst = MI.getOperand(0).getReg();
5586 LLT Ty = MRI.getType(Dst);
5588 unsigned NumEltBits = Ty.getScalarSizeInBits();
5589
5590 auto LogBase2 = buildLogBase2(RHS, Builder);
5591 auto ShiftAmt =
5592 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5593 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5594 Builder.buildLShr(Dst, LHS, Trunc);
5595 MI.eraseFromParent();
5596}
5597
5599 BuildFnTy &MatchInfo) {
5600 unsigned Opc = MI.getOpcode();
5601 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5602 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5603 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5604
5605 Register Dst = MI.getOperand(0).getReg();
5606 Register X = MI.getOperand(1).getReg();
5607 Register Y = MI.getOperand(2).getReg();
5608 LLT Type = MRI.getType(Dst);
5609
5610 // fold (fadd x, fneg(y)) -> (fsub x, y)
5611 // fold (fadd fneg(y), x) -> (fsub x, y)
5612 // G_ADD is commutative so both cases are checked by m_GFAdd
5613 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5614 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5615 Opc = TargetOpcode::G_FSUB;
5616 }
5617 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5618 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5619 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5620 Opc = TargetOpcode::G_FADD;
5621 }
5622 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5623 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5624 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5625 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5626 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5627 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5628 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5629 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5630 // no opcode change
5631 } else
5632 return false;
5633
5634 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5636 MI.setDesc(B.getTII().get(Opc));
5637 MI.getOperand(1).setReg(X);
5638 MI.getOperand(2).setReg(Y);
5640 };
5641 return true;
5642}
5643
5645 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5646
5647 Register LHS = MI.getOperand(1).getReg();
5648 MatchInfo = MI.getOperand(2).getReg();
5649 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5650
5651 const auto LHSCst = Ty.isVector()
5652 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5654 if (!LHSCst)
5655 return false;
5656
5657 // -0.0 is always allowed
5658 if (LHSCst->Value.isNegZero())
5659 return true;
5660
5661 // +0.0 is only allowed if nsz is set.
5662 if (LHSCst->Value.isPosZero())
5663 return MI.getFlag(MachineInstr::FmNsz);
5664
5665 return false;
5666}
5667
5669 Register Dst = MI.getOperand(0).getReg();
5671 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5672 eraseInst(MI);
5673}
5674
5675/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5676/// due to global flags or MachineInstr flags.
5677static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5678 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5679 return false;
5680 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5681}
5682
5683static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5684 const MachineRegisterInfo &MRI) {
5685 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5686 MRI.use_instr_nodbg_end()) >
5687 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5688 MRI.use_instr_nodbg_end());
5689}
5690
5692 bool &AllowFusionGlobally,
5693 bool &HasFMAD, bool &Aggressive,
5694 bool CanReassociate) {
5695
5696 auto *MF = MI.getMF();
5697 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5698 const TargetOptions &Options = MF->getTarget().Options;
5699 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5700
5701 if (CanReassociate &&
5702 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5703 return false;
5704
5705 // Floating-point multiply-add with intermediate rounding.
5706 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5707 // Floating-point multiply-add without intermediate rounding.
5708 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5709 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5710 // No valid opcode, do not combine.
5711 if (!HasFMAD && !HasFMA)
5712 return false;
5713
5714 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5715 Options.UnsafeFPMath || HasFMAD;
5716 // If the addition is not contractable, do not combine.
5717 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5718 return false;
5719
5720 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5721 return true;
5722}
5723
5725 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5726 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5727
5728 bool AllowFusionGlobally, HasFMAD, Aggressive;
5729 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5730 return false;
5731
5732 Register Op1 = MI.getOperand(1).getReg();
5733 Register Op2 = MI.getOperand(2).getReg();
5736 unsigned PreferredFusedOpcode =
5737 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5738
5739 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5740 // prefer to fold the multiply with fewer uses.
5741 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5742 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5743 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5744 std::swap(LHS, RHS);
5745 }
5746
5747 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5748 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5749 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5750 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5751 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5752 {LHS.MI->getOperand(1).getReg(),
5753 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5754 };
5755 return true;
5756 }
5757
5758 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5759 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5760 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5761 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5762 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5763 {RHS.MI->getOperand(1).getReg(),
5764 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5765 };
5766 return true;
5767 }
5768
5769 return false;
5770}
5771
5773 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5774 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5775
5776 bool AllowFusionGlobally, HasFMAD, Aggressive;
5777 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5778 return false;
5779
5780 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5781 Register Op1 = MI.getOperand(1).getReg();
5782 Register Op2 = MI.getOperand(2).getReg();
5785 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5786
5787 unsigned PreferredFusedOpcode =
5788 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5789
5790 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5791 // prefer to fold the multiply with fewer uses.
5792 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5793 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5794 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5795 std::swap(LHS, RHS);
5796 }
5797
5798 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5799 MachineInstr *FpExtSrc;
5800 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5801 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5802 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5803 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5804 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5805 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5806 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5807 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5808 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5809 };
5810 return true;
5811 }
5812
5813 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5814 // Note: Commutes FADD operands.
5815 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5816 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5817 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5818 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5819 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5820 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5821 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5822 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5823 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5824 };
5825 return true;
5826 }
5827
5828 return false;
5829}
5830
5832 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5833 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5834
5835 bool AllowFusionGlobally, HasFMAD, Aggressive;
5836 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5837 return false;
5838
5839 Register Op1 = MI.getOperand(1).getReg();
5840 Register Op2 = MI.getOperand(2).getReg();
5843 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5844
5845 unsigned PreferredFusedOpcode =
5846 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5847
5848 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5849 // prefer to fold the multiply with fewer uses.
5850 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5851 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5852 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5853 std::swap(LHS, RHS);
5854 }
5855
5856 MachineInstr *FMA = nullptr;
5857 Register Z;
5858 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5859 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5860 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5861 TargetOpcode::G_FMUL) &&
5862 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5863 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5864 FMA = LHS.MI;
5865 Z = RHS.Reg;
5866 }
5867 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5868 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5869 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5870 TargetOpcode::G_FMUL) &&
5871 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5872 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5873 Z = LHS.Reg;
5874 FMA = RHS.MI;
5875 }
5876
5877 if (FMA) {
5878 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5879 Register X = FMA->getOperand(1).getReg();
5880 Register Y = FMA->getOperand(2).getReg();
5881 Register U = FMulMI->getOperand(1).getReg();
5882 Register V = FMulMI->getOperand(2).getReg();
5883
5884 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5885 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5886 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5887 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5888 {X, Y, InnerFMA});
5889 };
5890 return true;
5891 }
5892
5893 return false;
5894}
5895
5897 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5898 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5899
5900 bool AllowFusionGlobally, HasFMAD, Aggressive;
5901 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5902 return false;
5903
5904 if (!Aggressive)
5905 return false;
5906
5907 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5908 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5909 Register Op1 = MI.getOperand(1).getReg();
5910 Register Op2 = MI.getOperand(2).getReg();
5913
5914 unsigned PreferredFusedOpcode =
5915 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5916
5917 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5918 // prefer to fold the multiply with fewer uses.
5919 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5920 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5921 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5922 std::swap(LHS, RHS);
5923 }
5924
5925 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5926 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5928 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5929 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5930 Register InnerFMA =
5931 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5932 .getReg(0);
5933 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5934 {X, Y, InnerFMA});
5935 };
5936
5937 MachineInstr *FMulMI, *FMAMI;
5938 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5939 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5940 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5941 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5942 m_GFPExt(m_MInstr(FMulMI))) &&
5943 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5944 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5945 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5946 MatchInfo = [=](MachineIRBuilder &B) {
5947 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5948 FMulMI->getOperand(2).getReg(), RHS.Reg,
5949 LHS.MI->getOperand(1).getReg(),
5950 LHS.MI->getOperand(2).getReg(), B);
5951 };
5952 return true;
5953 }
5954
5955 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5956 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5957 // FIXME: This turns two single-precision and one double-precision
5958 // operation into two double-precision operations, which might not be
5959 // interesting for all targets, especially GPUs.
5960 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5961 FMAMI->getOpcode() == PreferredFusedOpcode) {
5962 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5963 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5964 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5965 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5966 MatchInfo = [=](MachineIRBuilder &B) {
5967 Register X = FMAMI->getOperand(1).getReg();
5968 Register Y = FMAMI->getOperand(2).getReg();
5969 X = B.buildFPExt(DstType, X).getReg(0);
5970 Y = B.buildFPExt(DstType, Y).getReg(0);
5971 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5972 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5973 };
5974
5975 return true;
5976 }
5977 }
5978
5979 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5980 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5981 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5982 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5983 m_GFPExt(m_MInstr(FMulMI))) &&
5984 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5985 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5986 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5987 MatchInfo = [=](MachineIRBuilder &B) {
5988 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5989 FMulMI->getOperand(2).getReg(), LHS.Reg,
5990 RHS.MI->getOperand(1).getReg(),
5991 RHS.MI->getOperand(2).getReg(), B);
5992 };
5993 return true;
5994 }
5995
5996 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5997 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5998 // FIXME: This turns two single-precision and one double-precision
5999 // operation into two double-precision operations, which might not be
6000 // interesting for all targets, especially GPUs.
6001 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6002 FMAMI->getOpcode() == PreferredFusedOpcode) {
6003 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6004 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6005 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6006 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6007 MatchInfo = [=](MachineIRBuilder &B) {
6008 Register X = FMAMI->getOperand(1).getReg();
6009 Register Y = FMAMI->getOperand(2).getReg();
6010 X = B.buildFPExt(DstType, X).getReg(0);
6011 Y = B.buildFPExt(DstType, Y).getReg(0);
6012 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6013 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6014 };
6015 return true;
6016 }
6017 }
6018
6019 return false;
6020}
6021
6023 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6024 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6025
6026 bool AllowFusionGlobally, HasFMAD, Aggressive;
6027 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6028 return false;
6029
6030 Register Op1 = MI.getOperand(1).getReg();
6031 Register Op2 = MI.getOperand(2).getReg();
6034 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6035
6036 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6037 // prefer to fold the multiply with fewer uses.
6038 int FirstMulHasFewerUses = true;
6039 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6040 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6041 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6042 FirstMulHasFewerUses = false;
6043
6044 unsigned PreferredFusedOpcode =
6045 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6046
6047 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6048 if (FirstMulHasFewerUses &&
6049 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6050 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6051 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6052 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6053 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6054 {LHS.MI->getOperand(1).getReg(),
6055 LHS.MI->getOperand(2).getReg(), NegZ});
6056 };
6057 return true;
6058 }
6059 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6060 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6061 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6062 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6063 Register NegY =
6064 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6065 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6066 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6067 };
6068 return true;
6069 }
6070
6071 return false;
6072}
6073
6075 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6076 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6077
6078 bool AllowFusionGlobally, HasFMAD, Aggressive;
6079 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6080 return false;
6081
6082 Register LHSReg = MI.getOperand(1).getReg();
6083 Register RHSReg = MI.getOperand(2).getReg();
6084 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6085
6086 unsigned PreferredFusedOpcode =
6087 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6088
6089 MachineInstr *FMulMI;
6090 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6091 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6092 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6093 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6094 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6095 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6096 Register NegX =
6097 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6098 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6099 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6100 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6101 };
6102 return true;
6103 }
6104
6105 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6106 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6107 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6108 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6109 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6110 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6111 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6112 {FMulMI->getOperand(1).getReg(),
6113 FMulMI->getOperand(2).getReg(), LHSReg});
6114 };
6115 return true;
6116 }
6117
6118 return false;
6119}
6120
6122 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6123 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6124
6125 bool AllowFusionGlobally, HasFMAD, Aggressive;
6126 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6127 return false;
6128
6129 Register LHSReg = MI.getOperand(1).getReg();
6130 Register RHSReg = MI.getOperand(2).getReg();
6131 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6132
6133 unsigned PreferredFusedOpcode =
6134 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6135
6136 MachineInstr *FMulMI;
6137 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6138 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6139 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6140 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6141 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6142 Register FpExtX =
6143 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6144 Register FpExtY =
6145 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6146 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6147 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6148 {FpExtX, FpExtY, NegZ});
6149 };
6150 return true;
6151 }
6152
6153 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6154 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6155 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6156 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6157 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6158 Register FpExtY =
6159 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6160 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6161 Register FpExtZ =
6162 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6163 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6164 {NegY, FpExtZ, LHSReg});
6165 };
6166 return true;
6167 }
6168
6169 return false;
6170}
6171
6173 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6174 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6175
6176 bool AllowFusionGlobally, HasFMAD, Aggressive;
6177 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6178 return false;
6179
6180 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6181 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6182 Register LHSReg = MI.getOperand(1).getReg();
6183 Register RHSReg = MI.getOperand(2).getReg();
6184
6185 unsigned PreferredFusedOpcode =
6186 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6187
6188 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6190 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6191 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6192 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6193 };
6194
6195 MachineInstr *FMulMI;
6196 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6197 // (fneg (fma (fpext x), (fpext y), z))
6198 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6199 // (fneg (fma (fpext x), (fpext y), z))
6200 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6201 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6202 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6203 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6204 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6205 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6207 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6208 FMulMI->getOperand(2).getReg(), RHSReg, B);
6209 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6210 };
6211 return true;
6212 }
6213
6214 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6215 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6216 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6217 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6218 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6219 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6220 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6221 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6222 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6223 FMulMI->getOperand(2).getReg(), LHSReg, B);
6224 };
6225 return true;
6226 }
6227
6228 return false;
6229}
6230
6232 unsigned &IdxToPropagate) {
6233 bool PropagateNaN;
6234 switch (MI.getOpcode()) {
6235 default:
6236 return false;
6237 case TargetOpcode::G_FMINNUM:
6238 case TargetOpcode::G_FMAXNUM:
6239 PropagateNaN = false;
6240 break;
6241 case TargetOpcode::G_FMINIMUM:
6242 case TargetOpcode::G_FMAXIMUM:
6243 PropagateNaN = true;
6244 break;
6245 }
6246
6247 auto MatchNaN = [&](unsigned Idx) {
6248 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6249 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6250 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6251 return false;
6252 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6253 return true;
6254 };
6255
6256 return MatchNaN(1) || MatchNaN(2);
6257}
6258
6260 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6261 Register LHS = MI.getOperand(1).getReg();
6262 Register RHS = MI.getOperand(2).getReg();
6263
6264 // Helper lambda to check for opportunities for
6265 // A + (B - A) -> B
6266 // (B - A) + A -> B
6267 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6268 Register Reg;
6269 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6270 Reg == MaybeSameReg;
6271 };
6272 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6273}
6274
6276 Register &MatchInfo) {
6277 // This combine folds the following patterns:
6278 //
6279 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6280 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6281 // into
6282 // x
6283 // if
6284 // k == sizeof(VecEltTy)/2
6285 // type(x) == type(dst)
6286 //
6287 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6288 // into
6289 // x
6290 // if
6291 // type(x) == type(dst)
6292
6293 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6294 LLT DstEltTy = DstVecTy.getElementType();
6295
6296 Register Lo, Hi;
6297
6298 if (mi_match(
6299 MI, MRI,
6301 MatchInfo = Lo;
6302 return MRI.getType(MatchInfo) == DstVecTy;
6303 }
6304
6305 std::optional<ValueAndVReg> ShiftAmount;
6306 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6307 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6308 if (mi_match(
6309 MI, MRI,
6310 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6311 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6312 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6313 MatchInfo = Lo;
6314 return MRI.getType(MatchInfo) == DstVecTy;
6315 }
6316 }
6317
6318 return false;
6319}
6320
6322 Register &MatchInfo) {
6323 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6324 // if type(x) == type(G_TRUNC)
6325 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6326 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6327 return false;
6328
6329 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6330}
6331
6333 Register &MatchInfo) {
6334 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6335 // y if K == size of vector element type
6336 std::optional<ValueAndVReg> ShiftAmt;
6337 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6339 m_GCst(ShiftAmt))))
6340 return false;
6341
6342 LLT MatchTy = MRI.getType(MatchInfo);
6343 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6344 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6345}
6346
6347unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6348 CmpInst::Predicate Pred, LLT DstTy,
6349 SelectPatternNaNBehaviour VsNaNRetVal) const {
6350 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6351 "Expected a NaN behaviour?");
6352 // Choose an opcode based off of legality or the behaviour when one of the
6353 // LHS/RHS may be NaN.
6354 switch (Pred) {
6355 default:
6356 return 0;
6357 case CmpInst::FCMP_UGT:
6358 case CmpInst::FCMP_UGE:
6359 case CmpInst::FCMP_OGT:
6360 case CmpInst::FCMP_OGE:
6361 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6362 return TargetOpcode::G_FMAXNUM;
6363 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6364 return TargetOpcode::G_FMAXIMUM;
6365 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6366 return TargetOpcode::G_FMAXNUM;
6367 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6368 return TargetOpcode::G_FMAXIMUM;
6369 return 0;
6370 case CmpInst::FCMP_ULT:
6371 case CmpInst::FCMP_ULE:
6372 case CmpInst::FCMP_OLT:
6373 case CmpInst::FCMP_OLE:
6374 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6375 return TargetOpcode::G_FMINNUM;
6376 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6377 return TargetOpcode::G_FMINIMUM;
6378 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6379 return TargetOpcode::G_FMINNUM;
6380 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6381 return 0;
6382 return TargetOpcode::G_FMINIMUM;
6383 }
6384}
6385
6386CombinerHelper::SelectPatternNaNBehaviour
6387CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6388 bool IsOrderedComparison) const {
6389 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6390 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6391 // Completely unsafe.
6392 if (!LHSSafe && !RHSSafe)
6393 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6394 if (LHSSafe && RHSSafe)
6395 return SelectPatternNaNBehaviour::RETURNS_ANY;
6396 // An ordered comparison will return false when given a NaN, so it
6397 // returns the RHS.
6398 if (IsOrderedComparison)
6399 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6400 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6401 // An unordered comparison will return true when given a NaN, so it
6402 // returns the LHS.
6403 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6404 : SelectPatternNaNBehaviour::RETURNS_NAN;
6405}
6406
6407bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6408 Register TrueVal, Register FalseVal,
6409 BuildFnTy &MatchInfo) {
6410 // Match: select (fcmp cond x, y) x, y
6411 // select (fcmp cond x, y) y, x
6412 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6413 LLT DstTy = MRI.getType(Dst);
6414 // Bail out early on pointers, since we'll never want to fold to a min/max.
6415 if (DstTy.isPointer())
6416 return false;
6417 // Match a floating point compare with a less-than/greater-than predicate.
6418 // TODO: Allow multiple users of the compare if they are all selects.
6419 CmpInst::Predicate Pred;
6420 Register CmpLHS, CmpRHS;
6421 if (!mi_match(Cond, MRI,
6423 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6424 CmpInst::isEquality(Pred))
6425 return false;
6426 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6427 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6428 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6429 return false;
6430 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6431 std::swap(CmpLHS, CmpRHS);
6432 Pred = CmpInst::getSwappedPredicate(Pred);
6433 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6434 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6435 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6436 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6437 }
6438 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6439 return false;
6440 // Decide what type of max/min this should be based off of the predicate.
6441 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6442 if (!Opc || !isLegal({Opc, {DstTy}}))
6443 return false;
6444 // Comparisons between signed zero and zero may have different results...
6445 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6446 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6447 // We don't know if a comparison between two 0s will give us a consistent
6448 // result. Be conservative and only proceed if at least one side is
6449 // non-zero.
6450 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6451 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6452 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6453 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6454 return false;
6455 }
6456 }
6457 MatchInfo = [=](MachineIRBuilder &B) {
6458 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6459 };
6460 return true;
6461}
6462
6464 BuildFnTy &MatchInfo) {
6465 // TODO: Handle integer cases.
6466 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6467 // Condition may be fed by a truncated compare.
6468 Register Cond = MI.getOperand(1).getReg();
6469 Register MaybeTrunc;
6470 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6471 Cond = MaybeTrunc;
6472 Register Dst = MI.getOperand(0).getReg();
6473 Register TrueVal = MI.getOperand(2).getReg();
6474 Register FalseVal = MI.getOperand(3).getReg();
6475 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6476}
6477
6479 BuildFnTy &MatchInfo) {
6480 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6481 // (X + Y) == X --> Y == 0
6482 // (X + Y) != X --> Y != 0
6483 // (X - Y) == X --> Y == 0
6484 // (X - Y) != X --> Y != 0
6485 // (X ^ Y) == X --> Y == 0
6486 // (X ^ Y) != X --> Y != 0
6487 Register Dst = MI.getOperand(0).getReg();
6488 CmpInst::Predicate Pred;
6489 Register X, Y, OpLHS, OpRHS;
6490 bool MatchedSub = mi_match(
6491 Dst, MRI,
6492 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6493 if (MatchedSub && X != OpLHS)
6494 return false;
6495 if (!MatchedSub) {
6496 if (!mi_match(Dst, MRI,
6497 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6498 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6499 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6500 return false;
6501 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6502 }
6503 MatchInfo = [=](MachineIRBuilder &B) {
6504 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6505 B.buildICmp(Pred, Dst, Y, Zero);
6506 };
6507 return CmpInst::isEquality(Pred) && Y.isValid();
6508}
6509
6511 Register ShiftReg = MI.getOperand(2).getReg();
6512 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6513 auto IsShiftTooBig = [&](const Constant *C) {
6514 auto *CI = dyn_cast<ConstantInt>(C);
6515 return CI && CI->uge(ResTy.getScalarSizeInBits());
6516 };
6517 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6518}
6519
6521 unsigned LHSOpndIdx = 1;
6522 unsigned RHSOpndIdx = 2;
6523 switch (MI.getOpcode()) {
6524 case TargetOpcode::G_UADDO:
6525 case TargetOpcode::G_SADDO:
6526 case TargetOpcode::G_UMULO:
6527 case TargetOpcode::G_SMULO:
6528 LHSOpndIdx = 2;
6529 RHSOpndIdx = 3;
6530 break;
6531 default:
6532 break;
6533 }
6534 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6535 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6536 if (!getIConstantVRegVal(LHS, MRI)) {
6537 // Skip commuting if LHS is not a constant. But, LHS may be a
6538 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6539 // have a constant on the RHS.
6540 if (MRI.getVRegDef(LHS)->getOpcode() !=
6541 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6542 return false;
6543 }
6544 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6545 return MRI.getVRegDef(RHS)->getOpcode() !=
6546 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6548}
6549
6551 Register LHS = MI.getOperand(1).getReg();
6552 Register RHS = MI.getOperand(2).getReg();
6553 std::optional<FPValueAndVReg> ValAndVReg;
6554 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6555 return false;
6556 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6557}
6558
6561 unsigned LHSOpndIdx = 1;
6562 unsigned RHSOpndIdx = 2;
6563 switch (MI.getOpcode()) {
6564 case TargetOpcode::G_UADDO:
6565 case TargetOpcode::G_SADDO:
6566 case TargetOpcode::G_UMULO:
6567 case TargetOpcode::G_SMULO:
6568 LHSOpndIdx = 2;
6569 RHSOpndIdx = 3;
6570 break;
6571 default:
6572 break;
6573 }
6574 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6575 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6576 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6577 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6579}
6580
6581bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6582 LLT SrcTy = MRI.getType(Src);
6583 if (SrcTy.isFixedVector())
6584 return isConstantSplatVector(Src, 1, AllowUndefs);
6585 if (SrcTy.isScalar()) {
6586 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6587 return true;
6588 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6589 return IConstant && IConstant->Value == 1;
6590 }
6591 return false; // scalable vector
6592}
6593
6594bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6595 LLT SrcTy = MRI.getType(Src);
6596 if (SrcTy.isFixedVector())
6597 return isConstantSplatVector(Src, 0, AllowUndefs);
6598 if (SrcTy.isScalar()) {
6599 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6600 return true;
6601 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6602 return IConstant && IConstant->Value == 0;
6603 }
6604 return false; // scalable vector
6605}
6606
6607// Ignores COPYs during conformance checks.
6608// FIXME scalable vectors.
6609bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6610 bool AllowUndefs) {
6611 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6612 if (!BuildVector)
6613 return false;
6614 unsigned NumSources = BuildVector->getNumSources();
6615
6616 for (unsigned I = 0; I < NumSources; ++I) {
6617 GImplicitDef *ImplicitDef =
6618 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6619 if (ImplicitDef && AllowUndefs)
6620 continue;
6621 if (ImplicitDef && !AllowUndefs)
6622 return false;
6623 std::optional<ValueAndVReg> IConstant =
6625 if (IConstant && IConstant->Value == SplatValue)
6626 continue;
6627 return false;
6628 }
6629 return true;
6630}
6631
6632// Ignores COPYs during lookups.
6633// FIXME scalable vectors
6634std::optional<APInt>
6635CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6636 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6637 if (IConstant)
6638 return IConstant->Value;
6639
6640 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6641 if (!BuildVector)
6642 return std::nullopt;
6643 unsigned NumSources = BuildVector->getNumSources();
6644
6645 std::optional<APInt> Value = std::nullopt;
6646 for (unsigned I = 0; I < NumSources; ++I) {
6647 std::optional<ValueAndVReg> IConstant =
6649 if (!IConstant)
6650 return std::nullopt;
6651 if (!Value)
6652 Value = IConstant->Value;
6653 else if (*Value != IConstant->Value)
6654 return std::nullopt;
6655 }
6656 return Value;
6657}
6658
6659// FIXME G_SPLAT_VECTOR
6660bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6661 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6662 if (IConstant)
6663 return true;
6664
6665 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6666 if (!BuildVector)
6667 return false;
6668
6669 unsigned NumSources = BuildVector->getNumSources();
6670 for (unsigned I = 0; I < NumSources; ++I) {
6671 std::optional<ValueAndVReg> IConstant =
6673 if (!IConstant)
6674 return false;
6675 }
6676 return true;
6677}
6678
6679// TODO: use knownbits to determine zeros
6680bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6681 BuildFnTy &MatchInfo) {
6682 uint32_t Flags = Select->getFlags();
6683 Register Dest = Select->getReg(0);
6684 Register Cond = Select->getCondReg();
6685 Register True = Select->getTrueReg();
6686 Register False = Select->getFalseReg();
6687 LLT CondTy = MRI.getType(Select->getCondReg());
6688 LLT TrueTy = MRI.getType(Select->getTrueReg());
6689
6690 // We only do this combine for scalar boolean conditions.
6691 if (CondTy != LLT::scalar(1))
6692 return false;
6693
6694 if (TrueTy.isPointer())
6695 return false;
6696
6697 // Both are scalars.
6698 std::optional<ValueAndVReg> TrueOpt =
6700 std::optional<ValueAndVReg> FalseOpt =
6702
6703 if (!TrueOpt || !FalseOpt)
6704 return false;
6705
6706 APInt TrueValue = TrueOpt->Value;
6707 APInt FalseValue = FalseOpt->Value;
6708
6709 // select Cond, 1, 0 --> zext (Cond)
6710 if (TrueValue.isOne() && FalseValue.isZero()) {
6711 MatchInfo = [=](MachineIRBuilder &B) {
6712 B.setInstrAndDebugLoc(*Select);
6713 B.buildZExtOrTrunc(Dest, Cond);
6714 };
6715 return true;
6716 }
6717
6718 // select Cond, -1, 0 --> sext (Cond)
6719 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6720 MatchInfo = [=](MachineIRBuilder &B) {
6721 B.setInstrAndDebugLoc(*Select);
6722 B.buildSExtOrTrunc(Dest, Cond);
6723 };
6724 return true;
6725 }
6726
6727 // select Cond, 0, 1 --> zext (!Cond)
6728 if (TrueValue.isZero() && FalseValue.isOne()) {
6729 MatchInfo = [=](MachineIRBuilder &B) {
6730 B.setInstrAndDebugLoc(*Select);
6732 B.buildNot(Inner, Cond);
6733 B.buildZExtOrTrunc(Dest, Inner);
6734 };
6735 return true;
6736 }
6737
6738 // select Cond, 0, -1 --> sext (!Cond)
6739 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6740 MatchInfo = [=](MachineIRBuilder &B) {
6741 B.setInstrAndDebugLoc(*Select);
6743 B.buildNot(Inner, Cond);
6744 B.buildSExtOrTrunc(Dest, Inner);
6745 };
6746 return true;
6747 }
6748
6749 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6750 if (TrueValue - 1 == FalseValue) {
6751 MatchInfo = [=](MachineIRBuilder &B) {
6752 B.setInstrAndDebugLoc(*Select);
6754 B.buildZExtOrTrunc(Inner, Cond);
6755 B.buildAdd(Dest, Inner, False);
6756 };
6757 return true;
6758 }
6759
6760 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6761 if (TrueValue + 1 == FalseValue) {
6762 MatchInfo = [=](MachineIRBuilder &B) {
6763 B.setInstrAndDebugLoc(*Select);
6765 B.buildSExtOrTrunc(Inner, Cond);
6766 B.buildAdd(Dest, Inner, False);
6767 };
6768 return true;
6769 }
6770
6771 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6772 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6773 MatchInfo = [=](MachineIRBuilder &B) {
6774 B.setInstrAndDebugLoc(*Select);
6776 B.buildZExtOrTrunc(Inner, Cond);
6777 // The shift amount must be scalar.
6778 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6779 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6780 B.buildShl(Dest, Inner, ShAmtC, Flags);
6781 };
6782 return true;
6783 }
6784 // select Cond, -1, C --> or (sext Cond), C
6785 if (TrueValue.isAllOnes()) {
6786 MatchInfo = [=](MachineIRBuilder &B) {
6787 B.setInstrAndDebugLoc(*Select);
6789 B.buildSExtOrTrunc(Inner, Cond);
6790 B.buildOr(Dest, Inner, False, Flags);
6791 };
6792 return true;
6793 }
6794
6795 // select Cond, C, -1 --> or (sext (not Cond)), C
6796 if (FalseValue.isAllOnes()) {
6797 MatchInfo = [=](MachineIRBuilder &B) {
6798 B.setInstrAndDebugLoc(*Select);
6800 B.buildNot(Not, Cond);
6802 B.buildSExtOrTrunc(Inner, Not);
6803 B.buildOr(Dest, Inner, True, Flags);
6804 };
6805 return true;
6806 }
6807
6808 return false;
6809}
6810
6811// TODO: use knownbits to determine zeros
6812bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6813 BuildFnTy &MatchInfo) {
6814 uint32_t Flags = Select->getFlags();
6815 Register DstReg = Select->getReg(0);
6816 Register Cond = Select->getCondReg();
6817 Register True = Select->getTrueReg();
6818 Register False = Select->getFalseReg();
6819 LLT CondTy = MRI.getType(Select->getCondReg());
6820 LLT TrueTy = MRI.getType(Select->getTrueReg());
6821
6822 // Boolean or fixed vector of booleans.
6823 if (CondTy.isScalableVector() ||
6824 (CondTy.isFixedVector() &&
6825 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6826 CondTy.getScalarSizeInBits() != 1)
6827 return false;
6828
6829 if (CondTy != TrueTy)
6830 return false;
6831
6832 // select Cond, Cond, F --> or Cond, F
6833 // select Cond, 1, F --> or Cond, F
6834 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6835 MatchInfo = [=](MachineIRBuilder &B) {
6836 B.setInstrAndDebugLoc(*Select);
6838 B.buildZExtOrTrunc(Ext, Cond);
6839 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6840 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6841 };
6842 return true;
6843 }
6844
6845 // select Cond, T, Cond --> and Cond, T
6846 // select Cond, T, 0 --> and Cond, T
6847 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6848 MatchInfo = [=](MachineIRBuilder &B) {
6849 B.setInstrAndDebugLoc(*Select);
6851 B.buildZExtOrTrunc(Ext, Cond);
6852 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6853 B.buildAnd(DstReg, Ext, FreezeTrue);
6854 };
6855 return true;
6856 }
6857
6858 // select Cond, T, 1 --> or (not Cond), T
6859 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6860 MatchInfo = [=](MachineIRBuilder &B) {
6861 B.setInstrAndDebugLoc(*Select);
6862 // First the not.
6864 B.buildNot(Inner, Cond);
6865 // Then an ext to match the destination register.
6867 B.buildZExtOrTrunc(Ext, Inner);
6868 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6869 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6870 };
6871 return true;
6872 }
6873
6874 // select Cond, 0, F --> and (not Cond), F
6875 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6876 MatchInfo = [=](MachineIRBuilder &B) {
6877 B.setInstrAndDebugLoc(*Select);
6878 // First the not.
6880 B.buildNot(Inner, Cond);
6881 // Then an ext to match the destination register.
6883 B.buildZExtOrTrunc(Ext, Inner);
6884 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6885 B.buildAnd(DstReg, Ext, FreezeFalse);
6886 };
6887 return true;
6888 }
6889
6890 return false;
6891}
6892
6894 BuildFnTy &MatchInfo) {
6895 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
6896 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
6897
6898 Register DstReg = Select->getReg(0);
6899 Register True = Select->getTrueReg();
6900 Register False = Select->getFalseReg();
6901 LLT DstTy = MRI.getType(DstReg);
6902
6903 if (DstTy.isPointer())
6904 return false;
6905
6906 // We want to fold the icmp and replace the select.
6907 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6908 return false;
6909
6910 CmpInst::Predicate Pred = Cmp->getCond();
6911 // We need a larger or smaller predicate for
6912 // canonicalization.
6913 if (CmpInst::isEquality(Pred))
6914 return false;
6915
6916 Register CmpLHS = Cmp->getLHSReg();
6917 Register CmpRHS = Cmp->getRHSReg();
6918
6919 // We can swap CmpLHS and CmpRHS for higher hitrate.
6920 if (True == CmpRHS && False == CmpLHS) {
6921 std::swap(CmpLHS, CmpRHS);
6922 Pred = CmpInst::getSwappedPredicate(Pred);
6923 }
6924
6925 // (icmp X, Y) ? X : Y -> integer minmax.
6926 // see matchSelectPattern in ValueTracking.
6927 // Legality between G_SELECT and integer minmax can differ.
6928 if (True != CmpLHS || False != CmpRHS)
6929 return false;
6930
6931 switch (Pred) {
6932 case ICmpInst::ICMP_UGT:
6933 case ICmpInst::ICMP_UGE: {
6934 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6935 return false;
6936 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
6937 return true;
6938 }
6939 case ICmpInst::ICMP_SGT:
6940 case ICmpInst::ICMP_SGE: {
6941 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6942 return false;
6943 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
6944 return true;
6945 }
6946 case ICmpInst::ICMP_ULT:
6947 case ICmpInst::ICMP_ULE: {
6948 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6949 return false;
6950 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
6951 return true;
6952 }
6953 case ICmpInst::ICMP_SLT:
6954 case ICmpInst::ICMP_SLE: {
6955 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6956 return false;
6957 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
6958 return true;
6959 }
6960 default:
6961 return false;
6962 }
6963}
6964
6966 GSelect *Select = cast<GSelect>(&MI);
6967
6968 if (tryFoldSelectOfConstants(Select, MatchInfo))
6969 return true;
6970
6971 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6972 return true;
6973
6974 return false;
6975}
6976
6977/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6978/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6979/// into a single comparison using range-based reasoning.
6980/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6981bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6982 BuildFnTy &MatchInfo) {
6983 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6984 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6985 Register DstReg = Logic->getReg(0);
6986 Register LHS = Logic->getLHSReg();
6987 Register RHS = Logic->getRHSReg();
6988 unsigned Flags = Logic->getFlags();
6989
6990 // We need an G_ICMP on the LHS register.
6991 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6992 if (!Cmp1)
6993 return false;
6994
6995 // We need an G_ICMP on the RHS register.
6996 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6997 if (!Cmp2)
6998 return false;
6999
7000 // We want to fold the icmps.
7001 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7002 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7003 return false;
7004
7005 APInt C1;
7006 APInt C2;
7007 std::optional<ValueAndVReg> MaybeC1 =
7009 if (!MaybeC1)
7010 return false;
7011 C1 = MaybeC1->Value;
7012
7013 std::optional<ValueAndVReg> MaybeC2 =
7015 if (!MaybeC2)
7016 return false;
7017 C2 = MaybeC2->Value;
7018
7019 Register R1 = Cmp1->getLHSReg();
7020 Register R2 = Cmp2->getLHSReg();
7021 CmpInst::Predicate Pred1 = Cmp1->getCond();
7022 CmpInst::Predicate Pred2 = Cmp2->getCond();
7023 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7024 LLT CmpOperandTy = MRI.getType(R1);
7025
7026 if (CmpOperandTy.isPointer())
7027 return false;
7028
7029 // We build ands, adds, and constants of type CmpOperandTy.
7030 // They must be legal to build.
7031 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7032 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7033 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7034 return false;
7035
7036 // Look through add of a constant offset on R1, R2, or both operands. This
7037 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7038 std::optional<APInt> Offset1;
7039 std::optional<APInt> Offset2;
7040 if (R1 != R2) {
7041 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7042 std::optional<ValueAndVReg> MaybeOffset1 =
7044 if (MaybeOffset1) {
7045 R1 = Add->getLHSReg();
7046 Offset1 = MaybeOffset1->Value;
7047 }
7048 }
7049 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7050 std::optional<ValueAndVReg> MaybeOffset2 =
7052 if (MaybeOffset2) {
7053 R2 = Add->getLHSReg();
7054 Offset2 = MaybeOffset2->Value;
7055 }
7056 }
7057 }
7058
7059 if (R1 != R2)
7060 return false;
7061
7062 // We calculate the icmp ranges including maybe offsets.
7064 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7065 if (Offset1)
7066 CR1 = CR1.subtract(*Offset1);
7067
7069 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7070 if (Offset2)
7071 CR2 = CR2.subtract(*Offset2);
7072
7073 bool CreateMask = false;
7074 APInt LowerDiff;
7075 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7076 if (!CR) {
7077 // We need non-wrapping ranges.
7078 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7079 return false;
7080
7081 // Check whether we have equal-size ranges that only differ by one bit.
7082 // In that case we can apply a mask to map one range onto the other.
7083 LowerDiff = CR1.getLower() ^ CR2.getLower();
7084 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7085 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7086 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7087 CR1Size != CR2.getUpper() - CR2.getLower())
7088 return false;
7089
7090 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7091 CreateMask = true;
7092 }
7093
7094 if (IsAnd)
7095 CR = CR->inverse();
7096
7097 CmpInst::Predicate NewPred;
7098 APInt NewC, Offset;
7099 CR->getEquivalentICmp(NewPred, NewC, Offset);
7100
7101 // We take the result type of one of the original icmps, CmpTy, for
7102 // the to be build icmp. The operand type, CmpOperandTy, is used for
7103 // the other instructions and constants to be build. The types of
7104 // the parameters and output are the same for add and and. CmpTy
7105 // and the type of DstReg might differ. That is why we zext or trunc
7106 // the icmp into the destination register.
7107
7108 MatchInfo = [=](MachineIRBuilder &B) {
7109 if (CreateMask && Offset != 0) {
7110 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7111 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7112 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7113 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7114 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7115 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7116 B.buildZExtOrTrunc(DstReg, ICmp);
7117 } else if (CreateMask && Offset == 0) {
7118 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7119 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7120 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7121 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7122 B.buildZExtOrTrunc(DstReg, ICmp);
7123 } else if (!CreateMask && Offset != 0) {
7124 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7125 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7126 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7127 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7128 B.buildZExtOrTrunc(DstReg, ICmp);
7129 } else if (!CreateMask && Offset == 0) {
7130 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7131 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7132 B.buildZExtOrTrunc(DstReg, ICmp);
7133 } else {
7134 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7135 }
7136 };
7137 return true;
7138}
7139
7140bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7141 BuildFnTy &MatchInfo) {
7142 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7143 Register DestReg = Logic->getReg(0);
7144 Register LHS = Logic->getLHSReg();
7145 Register RHS = Logic->getRHSReg();
7146 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7147
7148 // We need a compare on the LHS register.
7149 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7150 if (!Cmp1)
7151 return false;
7152
7153 // We need a compare on the RHS register.
7154 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7155 if (!Cmp2)
7156 return false;
7157
7158 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7159 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7160
7161 // We build one fcmp, want to fold the fcmps, replace the logic op,
7162 // and the fcmps must have the same shape.
7164 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7165 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7166 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7167 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7168 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7169 return false;
7170
7171 CmpInst::Predicate PredL = Cmp1->getCond();
7172 CmpInst::Predicate PredR = Cmp2->getCond();
7173 Register LHS0 = Cmp1->getLHSReg();
7174 Register LHS1 = Cmp1->getRHSReg();
7175 Register RHS0 = Cmp2->getLHSReg();
7176 Register RHS1 = Cmp2->getRHSReg();
7177
7178 if (LHS0 == RHS1 && LHS1 == RHS0) {
7179 // Swap RHS operands to match LHS.
7180 PredR = CmpInst::getSwappedPredicate(PredR);
7181 std::swap(RHS0, RHS1);
7182 }
7183
7184 if (LHS0 == RHS0 && LHS1 == RHS1) {
7185 // We determine the new predicate.
7186 unsigned CmpCodeL = getFCmpCode(PredL);
7187 unsigned CmpCodeR = getFCmpCode(PredR);
7188 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7189 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7190 MatchInfo = [=](MachineIRBuilder &B) {
7191 // The fcmp predicates fill the lower part of the enum.
7192 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7193 if (Pred == FCmpInst::FCMP_FALSE &&
7195 auto False = B.buildConstant(CmpTy, 0);
7196 B.buildZExtOrTrunc(DestReg, False);
7197 } else if (Pred == FCmpInst::FCMP_TRUE &&
7199 auto True =
7200 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7201 CmpTy.isVector() /*isVector*/,
7202 true /*isFP*/));
7203 B.buildZExtOrTrunc(DestReg, True);
7204 } else { // We take the predicate without predicate optimizations.
7205 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7206 B.buildZExtOrTrunc(DestReg, Cmp);
7207 }
7208 };
7209 return true;
7210 }
7211
7212 return false;
7213}
7214
7216 GAnd *And = cast<GAnd>(&MI);
7217
7218 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7219 return true;
7220
7221 if (tryFoldLogicOfFCmps(And, MatchInfo))
7222 return true;
7223
7224 return false;
7225}
7226
7228 GOr *Or = cast<GOr>(&MI);
7229
7230 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7231 return true;
7232
7233 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7234 return true;
7235
7236 return false;
7237}
7238
7240 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7241
7242 // Addo has no flags
7243 Register Dst = Add->getReg(0);
7244 Register Carry = Add->getReg(1);
7245 Register LHS = Add->getLHSReg();
7246 Register RHS = Add->getRHSReg();
7247 bool IsSigned = Add->isSigned();
7248 LLT DstTy = MRI.getType(Dst);
7249 LLT CarryTy = MRI.getType(Carry);
7250
7251 // Fold addo, if the carry is dead -> add, undef.
7252 if (MRI.use_nodbg_empty(Carry) &&
7253 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7254 MatchInfo = [=](MachineIRBuilder &B) {
7255 B.buildAdd(Dst, LHS, RHS);
7256 B.buildUndef(Carry);
7257 };
7258 return true;
7259 }
7260
7261 // Canonicalize constant to RHS.
7262 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7263 if (IsSigned) {
7264 MatchInfo = [=](MachineIRBuilder &B) {
7265 B.buildSAddo(Dst, Carry, RHS, LHS);
7266 };
7267 return true;
7268 }
7269 // !IsSigned
7270 MatchInfo = [=](MachineIRBuilder &B) {
7271 B.buildUAddo(Dst, Carry, RHS, LHS);
7272 };
7273 return true;
7274 }
7275
7276 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7277 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7278
7279 // Fold addo(c1, c2) -> c3, carry.
7280 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7282 bool Overflow;
7283 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7284 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7285 MatchInfo = [=](MachineIRBuilder &B) {
7286 B.buildConstant(Dst, Result);
7287 B.buildConstant(Carry, Overflow);
7288 };
7289 return true;
7290 }
7291
7292 // Fold (addo x, 0) -> x, no carry
7293 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7294 MatchInfo = [=](MachineIRBuilder &B) {
7295 B.buildCopy(Dst, LHS);
7296 B.buildConstant(Carry, 0);
7297 };
7298 return true;
7299 }
7300
7301 // Given 2 constant operands whose sum does not overflow:
7302 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7303 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7304 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7305 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7306 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7307 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7308 std::optional<APInt> MaybeAddRHS =
7309 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7310 if (MaybeAddRHS) {
7311 bool Overflow;
7312 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7313 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7314 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7315 if (IsSigned) {
7316 MatchInfo = [=](MachineIRBuilder &B) {
7317 auto ConstRHS = B.buildConstant(DstTy, NewC);
7318 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7319 };
7320 return true;
7321 }
7322 // !IsSigned
7323 MatchInfo = [=](MachineIRBuilder &B) {
7324 auto ConstRHS = B.buildConstant(DstTy, NewC);
7325 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7326 };
7327 return true;
7328 }
7329 }
7330 };
7331
7332 // We try to combine addo to non-overflowing add.
7333 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7335 return false;
7336
7337 // We try to combine uaddo to non-overflowing add.
7338 if (!IsSigned) {
7339 ConstantRange CRLHS =
7340 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7341 ConstantRange CRRHS =
7342 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7343
7344 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7346 return false;
7348 MatchInfo = [=](MachineIRBuilder &B) {
7349 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7350 B.buildConstant(Carry, 0);
7351 };
7352 return true;
7353 }
7356 MatchInfo = [=](MachineIRBuilder &B) {
7357 B.buildAdd(Dst, LHS, RHS);
7358 B.buildConstant(Carry, 1);
7359 };
7360 return true;
7361 }
7362 }
7363 return false;
7364 }
7365
7366 // We try to combine saddo to non-overflowing add.
7367
7368 // If LHS and RHS each have at least two sign bits, then there is no signed
7369 // overflow.
7370 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7371 MatchInfo = [=](MachineIRBuilder &B) {
7372 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7373 B.buildConstant(Carry, 0);
7374 };
7375 return true;
7376 }
7377
7378 ConstantRange CRLHS =
7379 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7380 ConstantRange CRRHS =
7381 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7382
7383 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7385 return false;
7387 MatchInfo = [=](MachineIRBuilder &B) {
7388 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7389 B.buildConstant(Carry, 0);
7390 };
7391 return true;
7392 }
7395 MatchInfo = [=](MachineIRBuilder &B) {
7396 B.buildAdd(Dst, LHS, RHS);
7397 B.buildConstant(Carry, 1);
7398 };
7399 return true;
7400 }
7401 }
7402
7403 return false;
7404}
7405
7407 BuildFnTy &MatchInfo) {
7409 MatchInfo(Builder);
7410 Root->eraseFromParent();
7411}
7412
7414 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7416}
7417
7419 auto [Dst, Base] = MI.getFirst2Regs();
7420 LLT Ty = MRI.getType(Dst);
7421 int64_t ExpVal = Exponent;
7422
7423 if (ExpVal == 0) {
7424 Builder.buildFConstant(Dst, 1.0);
7425 MI.removeFromParent();
7426 return;
7427 }
7428
7429 if (ExpVal < 0)
7430 ExpVal = -ExpVal;
7431
7432 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
7433 // to generate the multiply sequence. There are more optimal ways to do this
7434 // (for example, powi(x,15) generates one more multiply than it should), but
7435 // this has the benefit of being both really simple and much better than a
7436 // libcall.
7437 std::optional<SrcOp> Res;
7438 SrcOp CurSquare = Base;
7439 while (ExpVal > 0) {
7440 if (ExpVal & 1) {
7441 if (!Res)
7442 Res = CurSquare;
7443 else
7444 Res = Builder.buildFMul(Ty, *Res, CurSquare);
7445 }
7446
7447 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
7448 ExpVal >>= 1;
7449 }
7450
7451 // If the original exponent was negative, invert the result, producing
7452 // 1/(x*x*x).
7453 if (Exponent < 0)
7454 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
7455 MI.getFlags());
7456
7457 Builder.buildCopy(Dst, *Res);
7458 MI.eraseFromParent();
7459}
7460
7462 BuildFnTy &MatchInfo) {
7463 GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
7464 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI));
7465
7466 Register Dst = Sext->getReg(0);
7467 Register Src = Trunc->getSrcReg();
7468
7469 LLT DstTy = MRI.getType(Dst);
7470 LLT SrcTy = MRI.getType(Src);
7471
7472 if (DstTy == SrcTy) {
7473 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7474 return true;
7475 }
7476
7477 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7478 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7479 MatchInfo = [=](MachineIRBuilder &B) {
7480 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap);
7481 };
7482 return true;
7483 }
7484
7485 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7486 isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) {
7487 MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
7488 return true;
7489 }
7490
7491 return false;
7492}
7493
7495 BuildFnTy &MatchInfo) {
7496 GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI));
7497 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI));
7498
7499 Register Dst = Zext->getReg(0);
7500 Register Src = Trunc->getSrcReg();
7501
7502 LLT DstTy = MRI.getType(Dst);
7503 LLT SrcTy = MRI.getType(Src);
7504
7505 if (DstTy == SrcTy) {
7506 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7507 return true;
7508 }
7509
7510 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7511 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7512 MatchInfo = [=](MachineIRBuilder &B) {
7513 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap);
7514 };
7515 return true;
7516 }
7517
7518 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7519 isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) {
7520 MatchInfo = [=](MachineIRBuilder &B) {
7521 B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg);
7522 };
7523 return true;
7524 }
7525
7526 return false;
7527}
7528
7530 BuildFnTy &MatchInfo) {
7531 GZext *Zext = cast<GZext>(MRI.getVRegDef(MO.getReg()));
7532
7533 Register Dst = Zext->getReg(0);
7534 Register Src = Zext->getSrcReg();
7535
7536 LLT DstTy = MRI.getType(Dst);
7537 LLT SrcTy = MRI.getType(Src);
7538 const auto &TLI = getTargetLowering();
7539
7540 // Convert zext nneg to sext if sext is the preferred form for the target.
7541 if (isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}}) &&
7542 TLI.isSExtCheaperThanZExt(getMVTForLLT(SrcTy), getMVTForLLT(DstTy))) {
7543 MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
7544 return true;
7545 }
7546
7547 return false;
7548}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1362
bool isNaN() const
Definition: APFloat.h:1352
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1146
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
int32_t exactLogBase2() const
Definition: APInt.h:1741
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:814
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1574
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:369
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:997
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:774
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:759
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine sext of trunc.
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent)
Match FPOWI if it's safe to extend it into a series of multiplications.
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent)
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool matchNonNegZext(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine zext nneg to sext.
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine zext of trunc.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine select to integer min/max.
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValue() const
Definition: Constants.h:313
const APFloat & getValueAPF() const
Definition: Constants.h:312
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Register getSrcReg() const
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a sext.
Represents a trunc.
Represents a G_ZEXTLOAD.
Represents a zext.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FDIV Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1433
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1974
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1393
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1546
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:727
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1516
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1528
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1561
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1593
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:658
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1496
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1426
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:953
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:440
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1618
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1411
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:488
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:496
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...