LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
227 MachineInstr &MI, BuildFnTy &MatchInfo) {
228 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
229 Register DstOp = MI.getOperand(0).getReg();
230 Register OrigOp = MI.getOperand(1).getReg();
231
232 if (!MRI.hasOneNonDBGUse(OrigOp))
233 return false;
234
235 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
236 // Even if only a single operand of the PHI is not guaranteed non-poison,
237 // moving freeze() backwards across a PHI can cause optimization issues for
238 // other users of that operand.
239 //
240 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
241 // the source register is unprofitable because it makes the freeze() more
242 // strict than is necessary (it would affect the whole register instead of
243 // just the subreg being frozen).
244 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
245 return false;
246
247 if (canCreateUndefOrPoison(OrigOp, MRI,
248 /*ConsiderFlagsAndMetadata=*/false))
249 return false;
250
251 std::optional<MachineOperand> MaybePoisonOperand;
252 for (MachineOperand &Operand : OrigDef->uses()) {
253 if (!Operand.isReg())
254 return false;
255
256 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
257 continue;
258
259 if (!MaybePoisonOperand)
260 MaybePoisonOperand = Operand;
261 else {
262 // We have more than one maybe-poison operand. Moving the freeze is
263 // unsafe.
264 return false;
265 }
266 }
267
268 // Eliminate freeze if all operands are guaranteed non-poison.
269 if (!MaybePoisonOperand) {
270 MatchInfo = [=](MachineIRBuilder &B) {
271 Observer.changingInstr(*OrigDef);
272 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
273 Observer.changedInstr(*OrigDef);
274 B.buildCopy(DstOp, OrigOp);
275 };
276 return true;
277 }
278
279 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
280 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
281
282 MatchInfo = [=](MachineIRBuilder &B) mutable {
283 Observer.changingInstr(*OrigDef);
284 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
285 Observer.changedInstr(*OrigDef);
286 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
287 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
289 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
290 Freeze.getReg(0));
291 replaceRegWith(MRI, DstOp, OrigOp);
292 };
293 return true;
294}
295
298 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
299 "Invalid instruction");
300 bool IsUndef = true;
301 MachineInstr *Undef = nullptr;
302
303 // Walk over all the operands of concat vectors and check if they are
304 // build_vector themselves or undef.
305 // Then collect their operands in Ops.
306 for (const MachineOperand &MO : MI.uses()) {
307 Register Reg = MO.getReg();
308 MachineInstr *Def = MRI.getVRegDef(Reg);
309 assert(Def && "Operand not defined");
310 if (!MRI.hasOneNonDBGUse(Reg))
311 return false;
312 switch (Def->getOpcode()) {
313 case TargetOpcode::G_BUILD_VECTOR:
314 IsUndef = false;
315 // Remember the operands of the build_vector to fold
316 // them into the yet-to-build flattened concat vectors.
317 for (const MachineOperand &BuildVecMO : Def->uses())
318 Ops.push_back(BuildVecMO.getReg());
319 break;
320 case TargetOpcode::G_IMPLICIT_DEF: {
321 LLT OpType = MRI.getType(Reg);
322 // Keep one undef value for all the undef operands.
323 if (!Undef) {
324 Builder.setInsertPt(*MI.getParent(), MI);
325 Undef = Builder.buildUndef(OpType.getScalarType());
326 }
327 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
328 OpType.getScalarType() &&
329 "All undefs should have the same type");
330 // Break the undef vector in as many scalar elements as needed
331 // for the flattening.
332 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
333 EltIdx != EltEnd; ++EltIdx)
334 Ops.push_back(Undef->getOperand(0).getReg());
335 break;
336 }
337 default:
338 return false;
339 }
340 }
341
342 // Check if the combine is illegal
343 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
345 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
346 return false;
347 }
348
349 if (IsUndef)
350 Ops.clear();
351
352 return true;
353}
356 // We determined that the concat_vectors can be flatten.
357 // Generate the flattened build_vector.
358 Register DstReg = MI.getOperand(0).getReg();
359 Builder.setInsertPt(*MI.getParent(), MI);
360 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
361
362 // Note: IsUndef is sort of redundant. We could have determine it by
363 // checking that at all Ops are undef. Alternatively, we could have
364 // generate a build_vector of undefs and rely on another combine to
365 // clean that up. For now, given we already gather this information
366 // in matchCombineConcatVectors, just save compile time and issue the
367 // right thing.
368 if (Ops.empty())
369 Builder.buildUndef(NewDstReg);
370 else
371 Builder.buildBuildVector(NewDstReg, Ops);
372 MI.eraseFromParent();
373 replaceRegWith(MRI, DstReg, NewDstReg);
374}
375
378 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
379 auto ConcatMI1 =
380 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
381 auto ConcatMI2 =
382 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
383 if (!ConcatMI1 || !ConcatMI2)
384 return false;
385
386 // Check that the sources of the Concat instructions have the same type
387 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
388 MRI.getType(ConcatMI2->getSourceReg(0)))
389 return false;
390
391 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
392 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
393 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
394 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
395 // Check if the index takes a whole source register from G_CONCAT_VECTORS
396 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
397 if (Mask[i] == -1) {
398 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
399 if (i + j >= Mask.size())
400 return false;
401 if (Mask[i + j] != -1)
402 return false;
403 }
405 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
406 return false;
407 Ops.push_back(0);
408 } else if (Mask[i] % ConcatSrcNumElt == 0) {
409 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
410 if (i + j >= Mask.size())
411 return false;
412 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
413 return false;
414 }
415 // Retrieve the source register from its respective G_CONCAT_VECTORS
416 // instruction
417 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
418 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
419 } else {
420 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
421 ConcatMI1->getNumSources()));
422 }
423 } else {
424 return false;
425 }
426 }
427
429 {TargetOpcode::G_CONCAT_VECTORS,
430 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
431 return false;
432
433 return !Ops.empty();
434}
435
438 LLT SrcTy = MRI.getType(Ops[0]);
439 Register UndefReg = 0;
440
441 for (unsigned i = 0; i < Ops.size(); i++) {
442 if (Ops[i] == 0) {
443 if (UndefReg == 0)
444 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
445 Ops[i] = UndefReg;
446 }
447 }
448
449 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
450 MI.eraseFromParent();
451}
452
455 if (matchCombineShuffleVector(MI, Ops)) {
457 return true;
458 }
459 return false;
460}
461
464 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
465 "Invalid instruction kind");
466 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
467 Register Src1 = MI.getOperand(1).getReg();
468 LLT SrcType = MRI.getType(Src1);
469 // As bizarre as it may look, shuffle vector can actually produce
470 // scalar! This is because at the IR level a <1 x ty> shuffle
471 // vector is perfectly valid.
472 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
473 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
474
475 // If the resulting vector is smaller than the size of the source
476 // vectors being concatenated, we won't be able to replace the
477 // shuffle vector into a concat_vectors.
478 //
479 // Note: We may still be able to produce a concat_vectors fed by
480 // extract_vector_elt and so on. It is less clear that would
481 // be better though, so don't bother for now.
482 //
483 // If the destination is a scalar, the size of the sources doesn't
484 // matter. we will lower the shuffle to a plain copy. This will
485 // work only if the source and destination have the same size. But
486 // that's covered by the next condition.
487 //
488 // TODO: If the size between the source and destination don't match
489 // we could still emit an extract vector element in that case.
490 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
491 return false;
492
493 // Check that the shuffle mask can be broken evenly between the
494 // different sources.
495 if (DstNumElts % SrcNumElts != 0)
496 return false;
497
498 // Mask length is a multiple of the source vector length.
499 // Check if the shuffle is some kind of concatenation of the input
500 // vectors.
501 unsigned NumConcat = DstNumElts / SrcNumElts;
502 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
503 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
504 for (unsigned i = 0; i != DstNumElts; ++i) {
505 int Idx = Mask[i];
506 // Undef value.
507 if (Idx < 0)
508 continue;
509 // Ensure the indices in each SrcType sized piece are sequential and that
510 // the same source is used for the whole piece.
511 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
512 (ConcatSrcs[i / SrcNumElts] >= 0 &&
513 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
514 return false;
515 // Remember which source this index came from.
516 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
517 }
518
519 // The shuffle is concatenating multiple vectors together.
520 // Collect the different operands for that.
521 Register UndefReg;
522 Register Src2 = MI.getOperand(2).getReg();
523 for (auto Src : ConcatSrcs) {
524 if (Src < 0) {
525 if (!UndefReg) {
526 Builder.setInsertPt(*MI.getParent(), MI);
527 UndefReg = Builder.buildUndef(SrcType).getReg(0);
528 }
529 Ops.push_back(UndefReg);
530 } else if (Src == 0)
531 Ops.push_back(Src1);
532 else
533 Ops.push_back(Src2);
534 }
535 return true;
536}
537
539 const ArrayRef<Register> Ops) {
540 Register DstReg = MI.getOperand(0).getReg();
541 Builder.setInsertPt(*MI.getParent(), MI);
542 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
543
544 if (Ops.size() == 1)
545 Builder.buildCopy(NewDstReg, Ops[0]);
546 else
547 Builder.buildMergeLikeInstr(NewDstReg, Ops);
548
549 MI.eraseFromParent();
550 replaceRegWith(MRI, DstReg, NewDstReg);
551}
552
554 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
555 "Invalid instruction kind");
556
557 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
558 return Mask.size() == 1;
559}
560
562 Register DstReg = MI.getOperand(0).getReg();
563 Builder.setInsertPt(*MI.getParent(), MI);
564
565 int I = MI.getOperand(3).getShuffleMask()[0];
566 Register Src1 = MI.getOperand(1).getReg();
567 LLT Src1Ty = MRI.getType(Src1);
568 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
569 Register SrcReg;
570 if (I >= Src1NumElts) {
571 SrcReg = MI.getOperand(2).getReg();
572 I -= Src1NumElts;
573 } else if (I >= 0)
574 SrcReg = Src1;
575
576 if (I < 0)
577 Builder.buildUndef(DstReg);
578 else if (!MRI.getType(SrcReg).isVector())
579 Builder.buildCopy(DstReg, SrcReg);
580 else
582
583 MI.eraseFromParent();
584}
585
586namespace {
587
588/// Select a preference between two uses. CurrentUse is the current preference
589/// while *ForCandidate is attributes of the candidate under consideration.
590PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
591 PreferredTuple &CurrentUse,
592 const LLT TyForCandidate,
593 unsigned OpcodeForCandidate,
594 MachineInstr *MIForCandidate) {
595 if (!CurrentUse.Ty.isValid()) {
596 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
597 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
598 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
599 return CurrentUse;
600 }
601
602 // We permit the extend to hoist through basic blocks but this is only
603 // sensible if the target has extending loads. If you end up lowering back
604 // into a load and extend during the legalizer then the end result is
605 // hoisting the extend up to the load.
606
607 // Prefer defined extensions to undefined extensions as these are more
608 // likely to reduce the number of instructions.
609 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
610 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
611 return CurrentUse;
612 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
613 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
614 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
615
616 // Prefer sign extensions to zero extensions as sign-extensions tend to be
617 // more expensive. Don't do this if the load is already a zero-extend load
618 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
619 // later.
620 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
621 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
622 OpcodeForCandidate == TargetOpcode::G_ZEXT)
623 return CurrentUse;
624 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
625 OpcodeForCandidate == TargetOpcode::G_SEXT)
626 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
627 }
628
629 // This is potentially target specific. We've chosen the largest type
630 // because G_TRUNC is usually free. One potential catch with this is that
631 // some targets have a reduced number of larger registers than smaller
632 // registers and this choice potentially increases the live-range for the
633 // larger value.
634 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
635 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
636 }
637 return CurrentUse;
638}
639
640/// Find a suitable place to insert some instructions and insert them. This
641/// function accounts for special cases like inserting before a PHI node.
642/// The current strategy for inserting before PHI's is to duplicate the
643/// instructions for each predecessor. However, while that's ok for G_TRUNC
644/// on most targets since it generally requires no code, other targets/cases may
645/// want to try harder to find a dominating block.
646static void InsertInsnsWithoutSideEffectsBeforeUse(
649 MachineOperand &UseMO)>
650 Inserter) {
651 MachineInstr &UseMI = *UseMO.getParent();
652
653 MachineBasicBlock *InsertBB = UseMI.getParent();
654
655 // If the use is a PHI then we want the predecessor block instead.
656 if (UseMI.isPHI()) {
657 MachineOperand *PredBB = std::next(&UseMO);
658 InsertBB = PredBB->getMBB();
659 }
660
661 // If the block is the same block as the def then we want to insert just after
662 // the def instead of at the start of the block.
663 if (InsertBB == DefMI.getParent()) {
665 Inserter(InsertBB, std::next(InsertPt), UseMO);
666 return;
667 }
668
669 // Otherwise we want the start of the BB
670 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
671}
672} // end anonymous namespace
673
675 PreferredTuple Preferred;
676 if (matchCombineExtendingLoads(MI, Preferred)) {
677 applyCombineExtendingLoads(MI, Preferred);
678 return true;
679 }
680 return false;
681}
682
683static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
684 unsigned CandidateLoadOpc;
685 switch (ExtOpc) {
686 case TargetOpcode::G_ANYEXT:
687 CandidateLoadOpc = TargetOpcode::G_LOAD;
688 break;
689 case TargetOpcode::G_SEXT:
690 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
691 break;
692 case TargetOpcode::G_ZEXT:
693 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
694 break;
695 default:
696 llvm_unreachable("Unexpected extend opc");
697 }
698 return CandidateLoadOpc;
699}
700
702 PreferredTuple &Preferred) {
703 // We match the loads and follow the uses to the extend instead of matching
704 // the extends and following the def to the load. This is because the load
705 // must remain in the same position for correctness (unless we also add code
706 // to find a safe place to sink it) whereas the extend is freely movable.
707 // It also prevents us from duplicating the load for the volatile case or just
708 // for performance.
709 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
710 if (!LoadMI)
711 return false;
712
713 Register LoadReg = LoadMI->getDstReg();
714
715 LLT LoadValueTy = MRI.getType(LoadReg);
716 if (!LoadValueTy.isScalar())
717 return false;
718
719 // Most architectures are going to legalize <s8 loads into at least a 1 byte
720 // load, and the MMOs can only describe memory accesses in multiples of bytes.
721 // If we try to perform extload combining on those, we can end up with
722 // %a(s8) = extload %ptr (load 1 byte from %ptr)
723 // ... which is an illegal extload instruction.
724 if (LoadValueTy.getSizeInBits() < 8)
725 return false;
726
727 // For non power-of-2 types, they will very likely be legalized into multiple
728 // loads. Don't bother trying to match them into extending loads.
729 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
730 return false;
731
732 // Find the preferred type aside from the any-extends (unless it's the only
733 // one) and non-extending ops. We'll emit an extending load to that type and
734 // and emit a variant of (extend (trunc X)) for the others according to the
735 // relative type sizes. At the same time, pick an extend to use based on the
736 // extend involved in the chosen type.
737 unsigned PreferredOpcode =
738 isa<GLoad>(&MI)
739 ? TargetOpcode::G_ANYEXT
740 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
741 Preferred = {LLT(), PreferredOpcode, nullptr};
742 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
743 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
744 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
745 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
746 const auto &MMO = LoadMI->getMMO();
747 // Don't do anything for atomics.
748 if (MMO.isAtomic())
749 continue;
750 // Check for legality.
751 if (!isPreLegalize()) {
752 LegalityQuery::MemDesc MMDesc(MMO);
753 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
754 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
755 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
756 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
757 .Action != LegalizeActions::Legal)
758 continue;
759 }
760 Preferred = ChoosePreferredUse(MI, Preferred,
761 MRI.getType(UseMI.getOperand(0).getReg()),
762 UseMI.getOpcode(), &UseMI);
763 }
764 }
765
766 // There were no extends
767 if (!Preferred.MI)
768 return false;
769 // It should be impossible to chose an extend without selecting a different
770 // type since by definition the result of an extend is larger.
771 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
772
773 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
774 return true;
775}
776
778 PreferredTuple &Preferred) {
779 // Rewrite the load to the chosen extending load.
780 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
781
782 // Inserter to insert a truncate back to the original type at a given point
783 // with some basic CSE to limit truncate duplication to one per BB.
785 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
786 MachineBasicBlock::iterator InsertBefore,
787 MachineOperand &UseMO) {
788 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
789 if (PreviouslyEmitted) {
791 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
793 return;
794 }
795
796 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
797 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
798 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
799 EmittedInsns[InsertIntoBB] = NewMI;
800 replaceRegOpWith(MRI, UseMO, NewDstReg);
801 };
802
804 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
805 MI.setDesc(Builder.getTII().get(LoadOpc));
806
807 // Rewrite all the uses to fix up the types.
808 auto &LoadValue = MI.getOperand(0);
810 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
811 Uses.push_back(&UseMO);
812
813 for (auto *UseMO : Uses) {
814 MachineInstr *UseMI = UseMO->getParent();
815
816 // If the extend is compatible with the preferred extend then we should fix
817 // up the type and extend so that it uses the preferred use.
818 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
819 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
820 Register UseDstReg = UseMI->getOperand(0).getReg();
821 MachineOperand &UseSrcMO = UseMI->getOperand(1);
822 const LLT UseDstTy = MRI.getType(UseDstReg);
823 if (UseDstReg != ChosenDstReg) {
824 if (Preferred.Ty == UseDstTy) {
825 // If the use has the same type as the preferred use, then merge
826 // the vregs and erase the extend. For example:
827 // %1:_(s8) = G_LOAD ...
828 // %2:_(s32) = G_SEXT %1(s8)
829 // %3:_(s32) = G_ANYEXT %1(s8)
830 // ... = ... %3(s32)
831 // rewrites to:
832 // %2:_(s32) = G_SEXTLOAD ...
833 // ... = ... %2(s32)
834 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
836 UseMO->getParent()->eraseFromParent();
837 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
838 // If the preferred size is smaller, then keep the extend but extend
839 // from the result of the extending load. For example:
840 // %1:_(s8) = G_LOAD ...
841 // %2:_(s32) = G_SEXT %1(s8)
842 // %3:_(s64) = G_ANYEXT %1(s8)
843 // ... = ... %3(s64)
844 /// rewrites to:
845 // %2:_(s32) = G_SEXTLOAD ...
846 // %3:_(s64) = G_ANYEXT %2:_(s32)
847 // ... = ... %3(s64)
848 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
849 } else {
850 // If the preferred size is large, then insert a truncate. For
851 // example:
852 // %1:_(s8) = G_LOAD ...
853 // %2:_(s64) = G_SEXT %1(s8)
854 // %3:_(s32) = G_ZEXT %1(s8)
855 // ... = ... %3(s32)
856 /// rewrites to:
857 // %2:_(s64) = G_SEXTLOAD ...
858 // %4:_(s8) = G_TRUNC %2:_(s32)
859 // %3:_(s64) = G_ZEXT %2:_(s8)
860 // ... = ... %3(s64)
861 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
862 InsertTruncAt);
863 }
864 continue;
865 }
866 // The use is (one of) the uses of the preferred use we chose earlier.
867 // We're going to update the load to def this value later so just erase
868 // the old extend.
870 UseMO->getParent()->eraseFromParent();
871 continue;
872 }
873
874 // The use isn't an extend. Truncate back to the type we originally loaded.
875 // This is free on many targets.
876 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
877 }
878
879 MI.getOperand(0).setReg(ChosenDstReg);
881}
882
884 BuildFnTy &MatchInfo) {
885 assert(MI.getOpcode() == TargetOpcode::G_AND);
886
887 // If we have the following code:
888 // %mask = G_CONSTANT 255
889 // %ld = G_LOAD %ptr, (load s16)
890 // %and = G_AND %ld, %mask
891 //
892 // Try to fold it into
893 // %ld = G_ZEXTLOAD %ptr, (load s8)
894
895 Register Dst = MI.getOperand(0).getReg();
896 if (MRI.getType(Dst).isVector())
897 return false;
898
899 auto MaybeMask =
900 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
901 if (!MaybeMask)
902 return false;
903
904 APInt MaskVal = MaybeMask->Value;
905
906 if (!MaskVal.isMask())
907 return false;
908
909 Register SrcReg = MI.getOperand(1).getReg();
910 // Don't use getOpcodeDef() here since intermediate instructions may have
911 // multiple users.
912 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
913 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
914 return false;
915
916 Register LoadReg = LoadMI->getDstReg();
917 LLT RegTy = MRI.getType(LoadReg);
918 Register PtrReg = LoadMI->getPointerReg();
919 unsigned RegSize = RegTy.getSizeInBits();
920 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
921 unsigned MaskSizeBits = MaskVal.countr_one();
922
923 // The mask may not be larger than the in-memory type, as it might cover sign
924 // extended bits
925 if (MaskSizeBits > LoadSizeBits.getValue())
926 return false;
927
928 // If the mask covers the whole destination register, there's nothing to
929 // extend
930 if (MaskSizeBits >= RegSize)
931 return false;
932
933 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
934 // at least byte loads. Avoid creating such loads here
935 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
936 return false;
937
938 const MachineMemOperand &MMO = LoadMI->getMMO();
939 LegalityQuery::MemDesc MemDesc(MMO);
940
941 // Don't modify the memory access size if this is atomic/volatile, but we can
942 // still adjust the opcode to indicate the high bit behavior.
943 if (LoadMI->isSimple())
944 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
945 else if (LoadSizeBits.getValue() > MaskSizeBits ||
946 LoadSizeBits.getValue() == RegSize)
947 return false;
948
949 // TODO: Could check if it's legal with the reduced or original memory size.
951 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
952 return false;
953
954 MatchInfo = [=](MachineIRBuilder &B) {
955 B.setInstrAndDebugLoc(*LoadMI);
956 auto &MF = B.getMF();
957 auto PtrInfo = MMO.getPointerInfo();
958 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
959 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
960 LoadMI->eraseFromParent();
961 };
962 return true;
963}
964
966 const MachineInstr &UseMI) {
967 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
968 "shouldn't consider debug uses");
969 assert(DefMI.getParent() == UseMI.getParent());
970 if (&DefMI == &UseMI)
971 return true;
972 const MachineBasicBlock &MBB = *DefMI.getParent();
973 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
974 return &MI == &DefMI || &MI == &UseMI;
975 });
976 if (DefOrUse == MBB.end())
977 llvm_unreachable("Block must contain both DefMI and UseMI!");
978 return &*DefOrUse == &DefMI;
979}
980
982 const MachineInstr &UseMI) {
983 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
984 "shouldn't consider debug uses");
985 if (MDT)
986 return MDT->dominates(&DefMI, &UseMI);
987 else if (DefMI.getParent() != UseMI.getParent())
988 return false;
989
990 return isPredecessor(DefMI, UseMI);
991}
992
994 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
995 Register SrcReg = MI.getOperand(1).getReg();
996 Register LoadUser = SrcReg;
997
998 if (MRI.getType(SrcReg).isVector())
999 return false;
1000
1001 Register TruncSrc;
1002 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1003 LoadUser = TruncSrc;
1004
1005 uint64_t SizeInBits = MI.getOperand(2).getImm();
1006 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1007 // need any extend at all, just a truncate.
1008 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1009 // If truncating more than the original extended value, abort.
1010 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1011 if (TruncSrc &&
1012 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1013 return false;
1014 if (LoadSizeBits == SizeInBits)
1015 return true;
1016 }
1017 return false;
1018}
1019
1021 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1022 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1023 MI.eraseFromParent();
1024}
1025
1027 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1028 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1029
1030 Register DstReg = MI.getOperand(0).getReg();
1031 LLT RegTy = MRI.getType(DstReg);
1032
1033 // Only supports scalars for now.
1034 if (RegTy.isVector())
1035 return false;
1036
1037 Register SrcReg = MI.getOperand(1).getReg();
1038 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1039 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
1040 return false;
1041
1042 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1043
1044 // If the sign extend extends from a narrower width than the load's width,
1045 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1046 // Avoid widening the load at all.
1047 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1048
1049 // Don't generate G_SEXTLOADs with a < 1 byte width.
1050 if (NewSizeBits < 8)
1051 return false;
1052 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1053 // anyway for most targets.
1054 if (!isPowerOf2_32(NewSizeBits))
1055 return false;
1056
1057 const MachineMemOperand &MMO = LoadDef->getMMO();
1058 LegalityQuery::MemDesc MMDesc(MMO);
1059
1060 // Don't modify the memory access size if this is atomic/volatile, but we can
1061 // still adjust the opcode to indicate the high bit behavior.
1062 if (LoadDef->isSimple())
1063 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1064 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1065 return false;
1066
1067 // TODO: Could check if it's legal with the reduced or original memory size.
1068 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1069 {MRI.getType(LoadDef->getDstReg()),
1070 MRI.getType(LoadDef->getPointerReg())},
1071 {MMDesc}}))
1072 return false;
1073
1074 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1075 return true;
1076}
1077
1079 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1080 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1081 Register LoadReg;
1082 unsigned ScalarSizeBits;
1083 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1084 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1085
1086 // If we have the following:
1087 // %ld = G_LOAD %ptr, (load 2)
1088 // %ext = G_SEXT_INREG %ld, 8
1089 // ==>
1090 // %ld = G_SEXTLOAD %ptr (load 1)
1091
1092 auto &MMO = LoadDef->getMMO();
1093 Builder.setInstrAndDebugLoc(*LoadDef);
1094 auto &MF = Builder.getMF();
1095 auto PtrInfo = MMO.getPointerInfo();
1096 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1097 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1098 LoadDef->getPointerReg(), *NewMMO);
1099 MI.eraseFromParent();
1100}
1101
1102/// Return true if 'MI' is a load or a store that may be fold it's address
1103/// operand into the load / store addressing mode.
1107 auto *MF = MI->getMF();
1108 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1109 if (!Addr)
1110 return false;
1111
1112 AM.HasBaseReg = true;
1113 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1114 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1115 else
1116 AM.Scale = 1; // [reg +/- reg]
1117
1118 return TLI.isLegalAddressingMode(
1119 MF->getDataLayout(), AM,
1120 getTypeForLLT(MI->getMMO().getMemoryType(),
1121 MF->getFunction().getContext()),
1122 MI->getMMO().getAddrSpace());
1123}
1124
1125static unsigned getIndexedOpc(unsigned LdStOpc) {
1126 switch (LdStOpc) {
1127 case TargetOpcode::G_LOAD:
1128 return TargetOpcode::G_INDEXED_LOAD;
1129 case TargetOpcode::G_STORE:
1130 return TargetOpcode::G_INDEXED_STORE;
1131 case TargetOpcode::G_ZEXTLOAD:
1132 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1133 case TargetOpcode::G_SEXTLOAD:
1134 return TargetOpcode::G_INDEXED_SEXTLOAD;
1135 default:
1136 llvm_unreachable("Unexpected opcode");
1137 }
1138}
1139
1140bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1141 // Check for legality.
1142 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1143 LLT Ty = MRI.getType(LdSt.getReg(0));
1144 LLT MemTy = LdSt.getMMO().getMemoryType();
1146 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1148 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1149 SmallVector<LLT> OpTys;
1150 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1151 OpTys = {PtrTy, Ty, Ty};
1152 else
1153 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1154
1155 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1156 return isLegal(Q);
1157}
1158
1160 "post-index-use-threshold", cl::Hidden, cl::init(32),
1161 cl::desc("Number of uses of a base pointer to check before it is no longer "
1162 "considered for post-indexing."));
1163
1164bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1166 bool &RematOffset) {
1167 // We're looking for the following pattern, for either load or store:
1168 // %baseptr:_(p0) = ...
1169 // G_STORE %val(s64), %baseptr(p0)
1170 // %offset:_(s64) = G_CONSTANT i64 -256
1171 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1172 const auto &TLI = getTargetLowering();
1173
1174 Register Ptr = LdSt.getPointerReg();
1175 // If the store is the only use, don't bother.
1176 if (MRI.hasOneNonDBGUse(Ptr))
1177 return false;
1178
1179 if (!isIndexedLoadStoreLegal(LdSt))
1180 return false;
1181
1182 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1183 return false;
1184
1185 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1186 auto *PtrDef = MRI.getVRegDef(Ptr);
1187
1188 unsigned NumUsesChecked = 0;
1189 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1190 if (++NumUsesChecked > PostIndexUseThreshold)
1191 return false; // Try to avoid exploding compile time.
1192
1193 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1194 // The use itself might be dead. This can happen during combines if DCE
1195 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1196 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1197 continue;
1198
1199 // Check the user of this isn't the store, otherwise we'd be generate a
1200 // indexed store defining its own use.
1201 if (StoredValDef == &Use)
1202 continue;
1203
1204 Offset = PtrAdd->getOffsetReg();
1205 if (!ForceLegalIndexing &&
1206 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1207 /*IsPre*/ false, MRI))
1208 continue;
1209
1210 // Make sure the offset calculation is before the potentially indexed op.
1211 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1212 RematOffset = false;
1213 if (!dominates(*OffsetDef, LdSt)) {
1214 // If the offset however is just a G_CONSTANT, we can always just
1215 // rematerialize it where we need it.
1216 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1217 continue;
1218 RematOffset = true;
1219 }
1220
1221 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1222 if (&BasePtrUse == PtrDef)
1223 continue;
1224
1225 // If the user is a later load/store that can be post-indexed, then don't
1226 // combine this one.
1227 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1228 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1229 dominates(LdSt, *BasePtrLdSt) &&
1230 isIndexedLoadStoreLegal(*BasePtrLdSt))
1231 return false;
1232
1233 // Now we're looking for the key G_PTR_ADD instruction, which contains
1234 // the offset add that we want to fold.
1235 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1236 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1237 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1238 // If the use is in a different block, then we may produce worse code
1239 // due to the extra register pressure.
1240 if (BaseUseUse.getParent() != LdSt.getParent())
1241 return false;
1242
1243 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1244 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1245 return false;
1246 }
1247 if (!dominates(LdSt, BasePtrUse))
1248 return false; // All use must be dominated by the load/store.
1249 }
1250 }
1251
1252 Addr = PtrAdd->getReg(0);
1253 Base = PtrAdd->getBaseReg();
1254 return true;
1255 }
1256
1257 return false;
1258}
1259
1260bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1262 auto &MF = *LdSt.getParent()->getParent();
1263 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1264
1265 Addr = LdSt.getPointerReg();
1268 return false;
1269
1270 if (!ForceLegalIndexing &&
1271 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1272 return false;
1273
1274 if (!isIndexedLoadStoreLegal(LdSt))
1275 return false;
1276
1278 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1279 return false;
1280
1281 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1282 // Would require a copy.
1283 if (Base == St->getValueReg())
1284 return false;
1285
1286 // We're expecting one use of Addr in MI, but it could also be the
1287 // value stored, which isn't actually dominated by the instruction.
1288 if (St->getValueReg() == Addr)
1289 return false;
1290 }
1291
1292 // Avoid increasing cross-block register pressure.
1293 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1294 if (AddrUse.getParent() != LdSt.getParent())
1295 return false;
1296
1297 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1298 // That might allow us to end base's liveness here by adjusting the constant.
1299 bool RealUse = false;
1300 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1301 if (!dominates(LdSt, AddrUse))
1302 return false; // All use must be dominated by the load/store.
1303
1304 // If Ptr may be folded in addressing mode of other use, then it's
1305 // not profitable to do this transformation.
1306 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1307 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1308 RealUse = true;
1309 } else {
1310 RealUse = true;
1311 }
1312 }
1313 return RealUse;
1314}
1315
1317 BuildFnTy &MatchInfo) {
1318 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1319
1320 // Check if there is a load that defines the vector being extracted from.
1321 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1322 if (!LoadMI)
1323 return false;
1324
1325 Register Vector = MI.getOperand(1).getReg();
1326 LLT VecEltTy = MRI.getType(Vector).getElementType();
1327
1328 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1329
1330 // Checking whether we should reduce the load width.
1332 return false;
1333
1334 // Check if the defining load is simple.
1335 if (!LoadMI->isSimple())
1336 return false;
1337
1338 // If the vector element type is not a multiple of a byte then we are unable
1339 // to correctly compute an address to load only the extracted element as a
1340 // scalar.
1341 if (!VecEltTy.isByteSized())
1342 return false;
1343
1344 // Check for load fold barriers between the extraction and the load.
1345 if (MI.getParent() != LoadMI->getParent())
1346 return false;
1347 const unsigned MaxIter = 20;
1348 unsigned Iter = 0;
1349 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1350 if (II->isLoadFoldBarrier())
1351 return false;
1352 if (Iter++ == MaxIter)
1353 return false;
1354 }
1355
1356 // Check if the new load that we are going to create is legal
1357 // if we are in the post-legalization phase.
1358 MachineMemOperand MMO = LoadMI->getMMO();
1359 Align Alignment = MMO.getAlign();
1360 MachinePointerInfo PtrInfo;
1362
1363 // Finding the appropriate PtrInfo if offset is a known constant.
1364 // This is required to create the memory operand for the narrowed load.
1365 // This machine memory operand object helps us infer about legality
1366 // before we proceed to combine the instruction.
1367 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1368 int Elt = CVal->getZExtValue();
1369 // FIXME: should be (ABI size)*Elt.
1370 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1371 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1372 } else {
1373 // Discard the pointer info except the address space because the memory
1374 // operand can't represent this new access since the offset is variable.
1375 Offset = VecEltTy.getSizeInBits() / 8;
1377 }
1378
1379 Alignment = commonAlignment(Alignment, Offset);
1380
1381 Register VecPtr = LoadMI->getPointerReg();
1382 LLT PtrTy = MRI.getType(VecPtr);
1383
1384 MachineFunction &MF = *MI.getMF();
1385 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1386
1387 LegalityQuery::MemDesc MMDesc(*NewMMO);
1388
1389 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1390
1392 return false;
1393
1394 // Load must be allowed and fast on the target.
1396 auto &DL = MF.getDataLayout();
1397 unsigned Fast = 0;
1398 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1399 &Fast) ||
1400 !Fast)
1401 return false;
1402
1403 Register Result = MI.getOperand(0).getReg();
1404 Register Index = MI.getOperand(2).getReg();
1405
1406 MatchInfo = [=](MachineIRBuilder &B) {
1407 GISelObserverWrapper DummyObserver;
1408 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1409 //// Get pointer to the vector element.
1410 Register finalPtr = Helper.getVectorElementPointer(
1411 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1412 Index);
1413 // New G_LOAD instruction.
1414 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1415 // Remove original GLOAD instruction.
1416 LoadMI->eraseFromParent();
1417 };
1418
1419 return true;
1420}
1421
1424 auto &LdSt = cast<GLoadStore>(MI);
1425
1426 if (LdSt.isAtomic())
1427 return false;
1428
1429 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1430 MatchInfo.Offset);
1431 if (!MatchInfo.IsPre &&
1432 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1433 MatchInfo.Offset, MatchInfo.RematOffset))
1434 return false;
1435
1436 return true;
1437}
1438
1441 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1442 unsigned Opcode = MI.getOpcode();
1443 bool IsStore = Opcode == TargetOpcode::G_STORE;
1444 unsigned NewOpcode = getIndexedOpc(Opcode);
1445
1446 // If the offset constant didn't happen to dominate the load/store, we can
1447 // just clone it as needed.
1448 if (MatchInfo.RematOffset) {
1449 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1450 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1451 *OldCst->getOperand(1).getCImm());
1452 MatchInfo.Offset = NewCst.getReg(0);
1453 }
1454
1455 auto MIB = Builder.buildInstr(NewOpcode);
1456 if (IsStore) {
1457 MIB.addDef(MatchInfo.Addr);
1458 MIB.addUse(MI.getOperand(0).getReg());
1459 } else {
1460 MIB.addDef(MI.getOperand(0).getReg());
1461 MIB.addDef(MatchInfo.Addr);
1462 }
1463
1464 MIB.addUse(MatchInfo.Base);
1465 MIB.addUse(MatchInfo.Offset);
1466 MIB.addImm(MatchInfo.IsPre);
1467 MIB->cloneMemRefs(*MI.getMF(), MI);
1468 MI.eraseFromParent();
1469 AddrDef.eraseFromParent();
1470
1471 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1472}
1473
1475 MachineInstr *&OtherMI) {
1476 unsigned Opcode = MI.getOpcode();
1477 bool IsDiv, IsSigned;
1478
1479 switch (Opcode) {
1480 default:
1481 llvm_unreachable("Unexpected opcode!");
1482 case TargetOpcode::G_SDIV:
1483 case TargetOpcode::G_UDIV: {
1484 IsDiv = true;
1485 IsSigned = Opcode == TargetOpcode::G_SDIV;
1486 break;
1487 }
1488 case TargetOpcode::G_SREM:
1489 case TargetOpcode::G_UREM: {
1490 IsDiv = false;
1491 IsSigned = Opcode == TargetOpcode::G_SREM;
1492 break;
1493 }
1494 }
1495
1496 Register Src1 = MI.getOperand(1).getReg();
1497 unsigned DivOpcode, RemOpcode, DivremOpcode;
1498 if (IsSigned) {
1499 DivOpcode = TargetOpcode::G_SDIV;
1500 RemOpcode = TargetOpcode::G_SREM;
1501 DivremOpcode = TargetOpcode::G_SDIVREM;
1502 } else {
1503 DivOpcode = TargetOpcode::G_UDIV;
1504 RemOpcode = TargetOpcode::G_UREM;
1505 DivremOpcode = TargetOpcode::G_UDIVREM;
1506 }
1507
1508 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1509 return false;
1510
1511 // Combine:
1512 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1513 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1514 // into:
1515 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1516
1517 // Combine:
1518 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1519 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1520 // into:
1521 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1522
1523 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1524 if (MI.getParent() == UseMI.getParent() &&
1525 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1526 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1527 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1528 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1529 OtherMI = &UseMI;
1530 return true;
1531 }
1532 }
1533
1534 return false;
1535}
1536
1538 MachineInstr *&OtherMI) {
1539 unsigned Opcode = MI.getOpcode();
1540 assert(OtherMI && "OtherMI shouldn't be empty.");
1541
1542 Register DestDivReg, DestRemReg;
1543 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1544 DestDivReg = MI.getOperand(0).getReg();
1545 DestRemReg = OtherMI->getOperand(0).getReg();
1546 } else {
1547 DestDivReg = OtherMI->getOperand(0).getReg();
1548 DestRemReg = MI.getOperand(0).getReg();
1549 }
1550
1551 bool IsSigned =
1552 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1553
1554 // Check which instruction is first in the block so we don't break def-use
1555 // deps by "moving" the instruction incorrectly. Also keep track of which
1556 // instruction is first so we pick it's operands, avoiding use-before-def
1557 // bugs.
1558 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1559 Builder.setInstrAndDebugLoc(*FirstInst);
1560
1561 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1562 : TargetOpcode::G_UDIVREM,
1563 {DestDivReg, DestRemReg},
1564 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1565 MI.eraseFromParent();
1566 OtherMI->eraseFromParent();
1567}
1568
1570 MachineInstr *&BrCond) {
1571 assert(MI.getOpcode() == TargetOpcode::G_BR);
1572
1573 // Try to match the following:
1574 // bb1:
1575 // G_BRCOND %c1, %bb2
1576 // G_BR %bb3
1577 // bb2:
1578 // ...
1579 // bb3:
1580
1581 // The above pattern does not have a fall through to the successor bb2, always
1582 // resulting in a branch no matter which path is taken. Here we try to find
1583 // and replace that pattern with conditional branch to bb3 and otherwise
1584 // fallthrough to bb2. This is generally better for branch predictors.
1585
1586 MachineBasicBlock *MBB = MI.getParent();
1588 if (BrIt == MBB->begin())
1589 return false;
1590 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1591
1592 BrCond = &*std::prev(BrIt);
1593 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1594 return false;
1595
1596 // Check that the next block is the conditional branch target. Also make sure
1597 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1598 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1599 return BrCondTarget != MI.getOperand(0).getMBB() &&
1600 MBB->isLayoutSuccessor(BrCondTarget);
1601}
1602
1604 MachineInstr *&BrCond) {
1605 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1607 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1608 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1609 // this to i1 only since we might not know for sure what kind of
1610 // compare generated the condition value.
1611 auto True = Builder.buildConstant(
1612 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1613 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1614
1615 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1617 MI.getOperand(0).setMBB(FallthroughBB);
1619
1620 // Change the conditional branch to use the inverted condition and
1621 // new target block.
1622 Observer.changingInstr(*BrCond);
1623 BrCond->getOperand(0).setReg(Xor.getReg(0));
1624 BrCond->getOperand(1).setMBB(BrTarget);
1625 Observer.changedInstr(*BrCond);
1626}
1627
1628
1630 MachineIRBuilder HelperBuilder(MI);
1631 GISelObserverWrapper DummyObserver;
1632 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1633 return Helper.lowerMemcpyInline(MI) ==
1635}
1636
1638 MachineIRBuilder HelperBuilder(MI);
1639 GISelObserverWrapper DummyObserver;
1640 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1641 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1643}
1644
1646 const MachineRegisterInfo &MRI,
1647 const APFloat &Val) {
1648 APFloat Result(Val);
1649 switch (MI.getOpcode()) {
1650 default:
1651 llvm_unreachable("Unexpected opcode!");
1652 case TargetOpcode::G_FNEG: {
1653 Result.changeSign();
1654 return Result;
1655 }
1656 case TargetOpcode::G_FABS: {
1657 Result.clearSign();
1658 return Result;
1659 }
1660 case TargetOpcode::G_FPTRUNC: {
1661 bool Unused;
1662 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1664 &Unused);
1665 return Result;
1666 }
1667 case TargetOpcode::G_FSQRT: {
1668 bool Unused;
1670 &Unused);
1671 Result = APFloat(sqrt(Result.convertToDouble()));
1672 break;
1673 }
1674 case TargetOpcode::G_FLOG2: {
1675 bool Unused;
1677 &Unused);
1678 Result = APFloat(log2(Result.convertToDouble()));
1679 break;
1680 }
1681 }
1682 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1683 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1684 // `G_FLOG2` reach here.
1685 bool Unused;
1686 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1687 return Result;
1688}
1689
1691 const ConstantFP *Cst) {
1692 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1693 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1694 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1695 MI.eraseFromParent();
1696}
1697
1699 PtrAddChain &MatchInfo) {
1700 // We're trying to match the following pattern:
1701 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1702 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1703 // -->
1704 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1705
1706 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1707 return false;
1708
1709 Register Add2 = MI.getOperand(1).getReg();
1710 Register Imm1 = MI.getOperand(2).getReg();
1711 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1712 if (!MaybeImmVal)
1713 return false;
1714
1715 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1716 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1717 return false;
1718
1719 Register Base = Add2Def->getOperand(1).getReg();
1720 Register Imm2 = Add2Def->getOperand(2).getReg();
1721 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1722 if (!MaybeImm2Val)
1723 return false;
1724
1725 // Check if the new combined immediate forms an illegal addressing mode.
1726 // Do not combine if it was legal before but would get illegal.
1727 // To do so, we need to find a load/store user of the pointer to get
1728 // the access type.
1729 Type *AccessTy = nullptr;
1730 auto &MF = *MI.getMF();
1731 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1732 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1733 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1734 MF.getFunction().getContext());
1735 break;
1736 }
1737 }
1739 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1740 AMNew.BaseOffs = CombinedImm.getSExtValue();
1741 if (AccessTy) {
1742 AMNew.HasBaseReg = true;
1744 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1745 AMOld.HasBaseReg = true;
1746 unsigned AS = MRI.getType(Add2).getAddressSpace();
1747 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1748 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1749 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1750 return false;
1751 }
1752
1753 // Pass the combined immediate to the apply function.
1754 MatchInfo.Imm = AMNew.BaseOffs;
1755 MatchInfo.Base = Base;
1756 MatchInfo.Bank = getRegBank(Imm2);
1757 return true;
1758}
1759
1761 PtrAddChain &MatchInfo) {
1762 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1763 MachineIRBuilder MIB(MI);
1764 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1765 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1766 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1768 MI.getOperand(1).setReg(MatchInfo.Base);
1769 MI.getOperand(2).setReg(NewOffset.getReg(0));
1771}
1772
1774 RegisterImmPair &MatchInfo) {
1775 // We're trying to match the following pattern with any of
1776 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1777 // %t1 = SHIFT %base, G_CONSTANT imm1
1778 // %root = SHIFT %t1, G_CONSTANT imm2
1779 // -->
1780 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1781
1782 unsigned Opcode = MI.getOpcode();
1783 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1784 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1785 Opcode == TargetOpcode::G_USHLSAT) &&
1786 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1787
1788 Register Shl2 = MI.getOperand(1).getReg();
1789 Register Imm1 = MI.getOperand(2).getReg();
1790 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1791 if (!MaybeImmVal)
1792 return false;
1793
1794 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1795 if (Shl2Def->getOpcode() != Opcode)
1796 return false;
1797
1798 Register Base = Shl2Def->getOperand(1).getReg();
1799 Register Imm2 = Shl2Def->getOperand(2).getReg();
1800 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1801 if (!MaybeImm2Val)
1802 return false;
1803
1804 // Pass the combined immediate to the apply function.
1805 MatchInfo.Imm =
1806 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1807 MatchInfo.Reg = Base;
1808
1809 // There is no simple replacement for a saturating unsigned left shift that
1810 // exceeds the scalar size.
1811 if (Opcode == TargetOpcode::G_USHLSAT &&
1812 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1813 return false;
1814
1815 return true;
1816}
1817
1819 RegisterImmPair &MatchInfo) {
1820 unsigned Opcode = MI.getOpcode();
1821 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1822 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1823 Opcode == TargetOpcode::G_USHLSAT) &&
1824 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1825
1826 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1827 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1828 auto Imm = MatchInfo.Imm;
1829
1830 if (Imm >= ScalarSizeInBits) {
1831 // Any logical shift that exceeds scalar size will produce zero.
1832 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1833 Builder.buildConstant(MI.getOperand(0), 0);
1834 MI.eraseFromParent();
1835 return;
1836 }
1837 // Arithmetic shift and saturating signed left shift have no effect beyond
1838 // scalar size.
1839 Imm = ScalarSizeInBits - 1;
1840 }
1841
1842 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1843 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1845 MI.getOperand(1).setReg(MatchInfo.Reg);
1846 MI.getOperand(2).setReg(NewImm);
1848}
1849
1851 ShiftOfShiftedLogic &MatchInfo) {
1852 // We're trying to match the following pattern with any of
1853 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1854 // with any of G_AND/G_OR/G_XOR logic instructions.
1855 // %t1 = SHIFT %X, G_CONSTANT C0
1856 // %t2 = LOGIC %t1, %Y
1857 // %root = SHIFT %t2, G_CONSTANT C1
1858 // -->
1859 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1860 // %t4 = SHIFT %Y, G_CONSTANT C1
1861 // %root = LOGIC %t3, %t4
1862 unsigned ShiftOpcode = MI.getOpcode();
1863 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1864 ShiftOpcode == TargetOpcode::G_ASHR ||
1865 ShiftOpcode == TargetOpcode::G_LSHR ||
1866 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1867 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1868 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1869
1870 // Match a one-use bitwise logic op.
1871 Register LogicDest = MI.getOperand(1).getReg();
1872 if (!MRI.hasOneNonDBGUse(LogicDest))
1873 return false;
1874
1875 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1876 unsigned LogicOpcode = LogicMI->getOpcode();
1877 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1878 LogicOpcode != TargetOpcode::G_XOR)
1879 return false;
1880
1881 // Find a matching one-use shift by constant.
1882 const Register C1 = MI.getOperand(2).getReg();
1883 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1884 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1885 return false;
1886
1887 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1888
1889 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1890 // Shift should match previous one and should be a one-use.
1891 if (MI->getOpcode() != ShiftOpcode ||
1892 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1893 return false;
1894
1895 // Must be a constant.
1896 auto MaybeImmVal =
1897 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1898 if (!MaybeImmVal)
1899 return false;
1900
1901 ShiftVal = MaybeImmVal->Value.getSExtValue();
1902 return true;
1903 };
1904
1905 // Logic ops are commutative, so check each operand for a match.
1906 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1907 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1908 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1909 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1910 uint64_t C0Val;
1911
1912 if (matchFirstShift(LogicMIOp1, C0Val)) {
1913 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1914 MatchInfo.Shift2 = LogicMIOp1;
1915 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1916 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1917 MatchInfo.Shift2 = LogicMIOp2;
1918 } else
1919 return false;
1920
1921 MatchInfo.ValSum = C0Val + C1Val;
1922
1923 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1924 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1925 return false;
1926
1927 MatchInfo.Logic = LogicMI;
1928 return true;
1929}
1930
1932 ShiftOfShiftedLogic &MatchInfo) {
1933 unsigned Opcode = MI.getOpcode();
1934 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1935 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1936 Opcode == TargetOpcode::G_SSHLSAT) &&
1937 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1938
1939 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1940 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1941
1942 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1943
1944 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1945 Register Shift1 =
1946 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1947
1948 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1949 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1950 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1951 // remove old shift1. And it will cause crash later. So erase it earlier to
1952 // avoid the crash.
1953 MatchInfo.Shift2->eraseFromParent();
1954
1955 Register Shift2Const = MI.getOperand(2).getReg();
1956 Register Shift2 = Builder
1957 .buildInstr(Opcode, {DestType},
1958 {MatchInfo.LogicNonShiftReg, Shift2Const})
1959 .getReg(0);
1960
1961 Register Dest = MI.getOperand(0).getReg();
1962 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1963
1964 // This was one use so it's safe to remove it.
1965 MatchInfo.Logic->eraseFromParent();
1966
1967 MI.eraseFromParent();
1968}
1969
1971 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1972 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1973 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1974 auto &Shl = cast<GenericMachineInstr>(MI);
1975 Register DstReg = Shl.getReg(0);
1976 Register SrcReg = Shl.getReg(1);
1977 Register ShiftReg = Shl.getReg(2);
1978 Register X, C1;
1979
1980 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1981 return false;
1982
1983 if (!mi_match(SrcReg, MRI,
1985 m_GOr(m_Reg(X), m_Reg(C1))))))
1986 return false;
1987
1988 APInt C1Val, C2Val;
1989 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1990 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1991 return false;
1992
1993 auto *SrcDef = MRI.getVRegDef(SrcReg);
1994 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1995 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1996 LLT SrcTy = MRI.getType(SrcReg);
1997 MatchInfo = [=](MachineIRBuilder &B) {
1998 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1999 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2000 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2001 };
2002 return true;
2003}
2004
2006 unsigned &ShiftVal) {
2007 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2008 auto MaybeImmVal =
2009 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2010 if (!MaybeImmVal)
2011 return false;
2012
2013 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2014 return (static_cast<int32_t>(ShiftVal) != -1);
2015}
2016
2018 unsigned &ShiftVal) {
2019 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2020 MachineIRBuilder MIB(MI);
2021 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2022 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2024 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2025 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2027}
2028
2029// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2031 RegisterImmPair &MatchData) {
2032 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
2033 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2034 return false;
2035
2036 Register LHS = MI.getOperand(1).getReg();
2037
2038 Register ExtSrc;
2039 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2040 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2041 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2042 return false;
2043
2044 Register RHS = MI.getOperand(2).getReg();
2045 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2046 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2047 if (!MaybeShiftAmtVal)
2048 return false;
2049
2050 if (LI) {
2051 LLT SrcTy = MRI.getType(ExtSrc);
2052
2053 // We only really care about the legality with the shifted value. We can
2054 // pick any type the constant shift amount, so ask the target what to
2055 // use. Otherwise we would have to guess and hope it is reported as legal.
2056 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2057 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2058 return false;
2059 }
2060
2061 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2062 MatchData.Reg = ExtSrc;
2063 MatchData.Imm = ShiftAmt;
2064
2065 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2066 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2067 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2068}
2069
2071 const RegisterImmPair &MatchData) {
2072 Register ExtSrcReg = MatchData.Reg;
2073 int64_t ShiftAmtVal = MatchData.Imm;
2074
2075 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2076 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2077 auto NarrowShift =
2078 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2079 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2080 MI.eraseFromParent();
2081}
2082
2084 Register &MatchInfo) {
2085 GMerge &Merge = cast<GMerge>(MI);
2086 SmallVector<Register, 16> MergedValues;
2087 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2088 MergedValues.emplace_back(Merge.getSourceReg(I));
2089
2090 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2091 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2092 return false;
2093
2094 for (unsigned I = 0; I < MergedValues.size(); ++I)
2095 if (MergedValues[I] != Unmerge->getReg(I))
2096 return false;
2097
2098 MatchInfo = Unmerge->getSourceReg();
2099 return true;
2100}
2101
2103 const MachineRegisterInfo &MRI) {
2104 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2105 ;
2106
2107 return Reg;
2108}
2109
2112 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2113 "Expected an unmerge");
2114 auto &Unmerge = cast<GUnmerge>(MI);
2115 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2116
2117 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2118 if (!SrcInstr)
2119 return false;
2120
2121 // Check the source type of the merge.
2122 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2123 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2124 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2125 if (SrcMergeTy != Dst0Ty && !SameSize)
2126 return false;
2127 // They are the same now (modulo a bitcast).
2128 // We can collect all the src registers.
2129 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2130 Operands.push_back(SrcInstr->getSourceReg(Idx));
2131 return true;
2132}
2133
2136 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2137 "Expected an unmerge");
2138 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2139 "Not enough operands to replace all defs");
2140 unsigned NumElems = MI.getNumOperands() - 1;
2141
2142 LLT SrcTy = MRI.getType(Operands[0]);
2143 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2144 bool CanReuseInputDirectly = DstTy == SrcTy;
2145 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2146 Register DstReg = MI.getOperand(Idx).getReg();
2147 Register SrcReg = Operands[Idx];
2148
2149 // This combine may run after RegBankSelect, so we need to be aware of
2150 // register banks.
2151 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2152 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2153 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2154 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2155 }
2156
2157 if (CanReuseInputDirectly)
2158 replaceRegWith(MRI, DstReg, SrcReg);
2159 else
2160 Builder.buildCast(DstReg, SrcReg);
2161 }
2162 MI.eraseFromParent();
2163}
2164
2166 SmallVectorImpl<APInt> &Csts) {
2167 unsigned SrcIdx = MI.getNumOperands() - 1;
2168 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2169 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2170 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2171 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2172 return false;
2173 // Break down the big constant in smaller ones.
2174 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2175 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2176 ? CstVal.getCImm()->getValue()
2177 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2178
2179 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2180 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2181 // Unmerge a constant.
2182 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2183 Csts.emplace_back(Val.trunc(ShiftAmt));
2184 Val = Val.lshr(ShiftAmt);
2185 }
2186
2187 return true;
2188}
2189
2191 SmallVectorImpl<APInt> &Csts) {
2192 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2193 "Expected an unmerge");
2194 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2195 "Not enough operands to replace all defs");
2196 unsigned NumElems = MI.getNumOperands() - 1;
2197 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2198 Register DstReg = MI.getOperand(Idx).getReg();
2199 Builder.buildConstant(DstReg, Csts[Idx]);
2200 }
2201
2202 MI.eraseFromParent();
2203}
2204
2206 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2207 unsigned SrcIdx = MI.getNumOperands() - 1;
2208 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2209 MatchInfo = [&MI](MachineIRBuilder &B) {
2210 unsigned NumElems = MI.getNumOperands() - 1;
2211 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2212 Register DstReg = MI.getOperand(Idx).getReg();
2213 B.buildUndef(DstReg);
2214 }
2215 };
2216 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2217}
2218
2220 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2221 "Expected an unmerge");
2222 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2223 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2224 return false;
2225 // Check that all the lanes are dead except the first one.
2226 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2227 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2228 return false;
2229 }
2230 return true;
2231}
2232
2234 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2235 Register Dst0Reg = MI.getOperand(0).getReg();
2236 Builder.buildTrunc(Dst0Reg, SrcReg);
2237 MI.eraseFromParent();
2238}
2239
2241 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2242 "Expected an unmerge");
2243 Register Dst0Reg = MI.getOperand(0).getReg();
2244 LLT Dst0Ty = MRI.getType(Dst0Reg);
2245 // G_ZEXT on vector applies to each lane, so it will
2246 // affect all destinations. Therefore we won't be able
2247 // to simplify the unmerge to just the first definition.
2248 if (Dst0Ty.isVector())
2249 return false;
2250 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2251 LLT SrcTy = MRI.getType(SrcReg);
2252 if (SrcTy.isVector())
2253 return false;
2254
2255 Register ZExtSrcReg;
2256 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2257 return false;
2258
2259 // Finally we can replace the first definition with
2260 // a zext of the source if the definition is big enough to hold
2261 // all of ZExtSrc bits.
2262 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2263 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2264}
2265
2267 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2268 "Expected an unmerge");
2269
2270 Register Dst0Reg = MI.getOperand(0).getReg();
2271
2272 MachineInstr *ZExtInstr =
2273 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2274 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2275 "Expecting a G_ZEXT");
2276
2277 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2278 LLT Dst0Ty = MRI.getType(Dst0Reg);
2279 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2280
2281 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2282 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2283 } else {
2284 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2285 "ZExt src doesn't fit in destination");
2286 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2287 }
2288
2289 Register ZeroReg;
2290 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2291 if (!ZeroReg)
2292 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2293 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2294 }
2295 MI.eraseFromParent();
2296}
2297
2299 unsigned TargetShiftSize,
2300 unsigned &ShiftVal) {
2301 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2302 MI.getOpcode() == TargetOpcode::G_LSHR ||
2303 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2304
2305 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2306 if (Ty.isVector()) // TODO:
2307 return false;
2308
2309 // Don't narrow further than the requested size.
2310 unsigned Size = Ty.getSizeInBits();
2311 if (Size <= TargetShiftSize)
2312 return false;
2313
2314 auto MaybeImmVal =
2315 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2316 if (!MaybeImmVal)
2317 return false;
2318
2319 ShiftVal = MaybeImmVal->Value.getSExtValue();
2320 return ShiftVal >= Size / 2 && ShiftVal < Size;
2321}
2322
2324 const unsigned &ShiftVal) {
2325 Register DstReg = MI.getOperand(0).getReg();
2326 Register SrcReg = MI.getOperand(1).getReg();
2327 LLT Ty = MRI.getType(SrcReg);
2328 unsigned Size = Ty.getSizeInBits();
2329 unsigned HalfSize = Size / 2;
2330 assert(ShiftVal >= HalfSize);
2331
2332 LLT HalfTy = LLT::scalar(HalfSize);
2333
2334 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2335 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2336
2337 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2338 Register Narrowed = Unmerge.getReg(1);
2339
2340 // dst = G_LSHR s64:x, C for C >= 32
2341 // =>
2342 // lo, hi = G_UNMERGE_VALUES x
2343 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2344
2345 if (NarrowShiftAmt != 0) {
2346 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2347 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2348 }
2349
2350 auto Zero = Builder.buildConstant(HalfTy, 0);
2351 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2352 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2353 Register Narrowed = Unmerge.getReg(0);
2354 // dst = G_SHL s64:x, C for C >= 32
2355 // =>
2356 // lo, hi = G_UNMERGE_VALUES x
2357 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2358 if (NarrowShiftAmt != 0) {
2359 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2360 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2361 }
2362
2363 auto Zero = Builder.buildConstant(HalfTy, 0);
2364 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2365 } else {
2366 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2367 auto Hi = Builder.buildAShr(
2368 HalfTy, Unmerge.getReg(1),
2369 Builder.buildConstant(HalfTy, HalfSize - 1));
2370
2371 if (ShiftVal == HalfSize) {
2372 // (G_ASHR i64:x, 32) ->
2373 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2374 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2375 } else if (ShiftVal == Size - 1) {
2376 // Don't need a second shift.
2377 // (G_ASHR i64:x, 63) ->
2378 // %narrowed = (G_ASHR hi_32(x), 31)
2379 // G_MERGE_VALUES %narrowed, %narrowed
2380 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2381 } else {
2382 auto Lo = Builder.buildAShr(
2383 HalfTy, Unmerge.getReg(1),
2384 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2385
2386 // (G_ASHR i64:x, C) ->, for C >= 32
2387 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2388 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2389 }
2390 }
2391
2392 MI.eraseFromParent();
2393}
2394
2396 unsigned TargetShiftAmount) {
2397 unsigned ShiftAmt;
2398 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2399 applyCombineShiftToUnmerge(MI, ShiftAmt);
2400 return true;
2401 }
2402
2403 return false;
2404}
2405
2407 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2408 Register DstReg = MI.getOperand(0).getReg();
2409 LLT DstTy = MRI.getType(DstReg);
2410 Register SrcReg = MI.getOperand(1).getReg();
2411 return mi_match(SrcReg, MRI,
2412 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2413}
2414
2416 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2417 Register DstReg = MI.getOperand(0).getReg();
2418 Builder.buildCopy(DstReg, Reg);
2419 MI.eraseFromParent();
2420}
2421
2423 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2424 Register DstReg = MI.getOperand(0).getReg();
2425 Builder.buildZExtOrTrunc(DstReg, Reg);
2426 MI.eraseFromParent();
2427}
2428
2430 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2431 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2432 Register LHS = MI.getOperand(1).getReg();
2433 Register RHS = MI.getOperand(2).getReg();
2434 LLT IntTy = MRI.getType(LHS);
2435
2436 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2437 // instruction.
2438 PtrReg.second = false;
2439 for (Register SrcReg : {LHS, RHS}) {
2440 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2441 // Don't handle cases where the integer is implicitly converted to the
2442 // pointer width.
2443 LLT PtrTy = MRI.getType(PtrReg.first);
2444 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2445 return true;
2446 }
2447
2448 PtrReg.second = true;
2449 }
2450
2451 return false;
2452}
2453
2455 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2456 Register Dst = MI.getOperand(0).getReg();
2457 Register LHS = MI.getOperand(1).getReg();
2458 Register RHS = MI.getOperand(2).getReg();
2459
2460 const bool DoCommute = PtrReg.second;
2461 if (DoCommute)
2462 std::swap(LHS, RHS);
2463 LHS = PtrReg.first;
2464
2465 LLT PtrTy = MRI.getType(LHS);
2466
2467 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2468 Builder.buildPtrToInt(Dst, PtrAdd);
2469 MI.eraseFromParent();
2470}
2471
2473 APInt &NewCst) {
2474 auto &PtrAdd = cast<GPtrAdd>(MI);
2475 Register LHS = PtrAdd.getBaseReg();
2476 Register RHS = PtrAdd.getOffsetReg();
2478
2479 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2480 APInt Cst;
2481 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2482 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2483 // G_INTTOPTR uses zero-extension
2484 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2485 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2486 return true;
2487 }
2488 }
2489
2490 return false;
2491}
2492
2494 APInt &NewCst) {
2495 auto &PtrAdd = cast<GPtrAdd>(MI);
2496 Register Dst = PtrAdd.getReg(0);
2497
2498 Builder.buildConstant(Dst, NewCst);
2499 PtrAdd.eraseFromParent();
2500}
2501
2503 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2504 Register DstReg = MI.getOperand(0).getReg();
2505 Register SrcReg = MI.getOperand(1).getReg();
2506 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2507 if (OriginalSrcReg.isValid())
2508 SrcReg = OriginalSrcReg;
2509 LLT DstTy = MRI.getType(DstReg);
2510 return mi_match(SrcReg, MRI,
2511 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2512}
2513
2515 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2516 Register DstReg = MI.getOperand(0).getReg();
2517 Register SrcReg = MI.getOperand(1).getReg();
2518 LLT DstTy = MRI.getType(DstReg);
2519 if (mi_match(SrcReg, MRI,
2520 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2521 unsigned DstSize = DstTy.getScalarSizeInBits();
2522 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2523 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2524 }
2525 return false;
2526}
2527
2529 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2530 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2531 MI.getOpcode() == TargetOpcode::G_SEXT ||
2532 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2533 "Expected a G_[ASZ]EXT");
2534 Register SrcReg = MI.getOperand(1).getReg();
2535 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2536 if (OriginalSrcReg.isValid())
2537 SrcReg = OriginalSrcReg;
2538 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2539 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2540 unsigned Opc = MI.getOpcode();
2541 unsigned SrcOpc = SrcMI->getOpcode();
2542 if (Opc == SrcOpc ||
2543 (Opc == TargetOpcode::G_ANYEXT &&
2544 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2545 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2546 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2547 return true;
2548 }
2549 return false;
2550}
2551
2553 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2554 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2555 MI.getOpcode() == TargetOpcode::G_SEXT ||
2556 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2557 "Expected a G_[ASZ]EXT");
2558
2559 Register Reg = std::get<0>(MatchInfo);
2560 unsigned SrcExtOp = std::get<1>(MatchInfo);
2561
2562 // Combine exts with the same opcode.
2563 if (MI.getOpcode() == SrcExtOp) {
2565 MI.getOperand(1).setReg(Reg);
2567 return;
2568 }
2569
2570 // Combine:
2571 // - anyext([sz]ext x) to [sz]ext x
2572 // - sext(zext x) to zext x
2573 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2574 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2575 SrcExtOp == TargetOpcode::G_ZEXT)) {
2576 Register DstReg = MI.getOperand(0).getReg();
2577 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2578 MI.eraseFromParent();
2579 }
2580}
2581
2583 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2584 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2585 Register SrcReg = MI.getOperand(1).getReg();
2586 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2587 unsigned SrcOpc = SrcMI->getOpcode();
2588 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2589 SrcOpc == TargetOpcode::G_ZEXT) {
2590 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2591 return true;
2592 }
2593 return false;
2594}
2595
2597 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2598 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2599 Register SrcReg = MatchInfo.first;
2600 unsigned SrcExtOp = MatchInfo.second;
2601 Register DstReg = MI.getOperand(0).getReg();
2602 LLT SrcTy = MRI.getType(SrcReg);
2603 LLT DstTy = MRI.getType(DstReg);
2604 if (SrcTy == DstTy) {
2605 MI.eraseFromParent();
2606 replaceRegWith(MRI, DstReg, SrcReg);
2607 return;
2608 }
2609 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2610 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2611 else
2612 Builder.buildTrunc(DstReg, SrcReg);
2613 MI.eraseFromParent();
2614}
2615
2617 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2618 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2619
2620 // ShiftTy > 32 > TruncTy -> 32
2621 if (ShiftSize > 32 && TruncSize < 32)
2622 return ShiftTy.changeElementSize(32);
2623
2624 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2625 // Some targets like it, some don't, some only like it under certain
2626 // conditions/processor versions, etc.
2627 // A TL hook might be needed for this.
2628
2629 // Don't combine
2630 return ShiftTy;
2631}
2632
2634 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2635 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2636 Register DstReg = MI.getOperand(0).getReg();
2637 Register SrcReg = MI.getOperand(1).getReg();
2638
2639 if (!MRI.hasOneNonDBGUse(SrcReg))
2640 return false;
2641
2642 LLT SrcTy = MRI.getType(SrcReg);
2643 LLT DstTy = MRI.getType(DstReg);
2644
2645 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2646 const auto &TL = getTargetLowering();
2647
2648 LLT NewShiftTy;
2649 switch (SrcMI->getOpcode()) {
2650 default:
2651 return false;
2652 case TargetOpcode::G_SHL: {
2653 NewShiftTy = DstTy;
2654
2655 // Make sure new shift amount is legal.
2656 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2657 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2658 return false;
2659 break;
2660 }
2661 case TargetOpcode::G_LSHR:
2662 case TargetOpcode::G_ASHR: {
2663 // For right shifts, we conservatively do not do the transform if the TRUNC
2664 // has any STORE users. The reason is that if we change the type of the
2665 // shift, we may break the truncstore combine.
2666 //
2667 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2668 for (auto &User : MRI.use_instructions(DstReg))
2669 if (User.getOpcode() == TargetOpcode::G_STORE)
2670 return false;
2671
2672 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2673 if (NewShiftTy == SrcTy)
2674 return false;
2675
2676 // Make sure we won't lose information by truncating the high bits.
2677 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2678 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2679 DstTy.getScalarSizeInBits()))
2680 return false;
2681 break;
2682 }
2683 }
2684
2686 {SrcMI->getOpcode(),
2687 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2688 return false;
2689
2690 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2691 return true;
2692}
2693
2695 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2696 MachineInstr *ShiftMI = MatchInfo.first;
2697 LLT NewShiftTy = MatchInfo.second;
2698
2699 Register Dst = MI.getOperand(0).getReg();
2700 LLT DstTy = MRI.getType(Dst);
2701
2702 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2703 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2704 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2705
2706 Register NewShift =
2707 Builder
2708 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2709 .getReg(0);
2710
2711 if (NewShiftTy == DstTy)
2712 replaceRegWith(MRI, Dst, NewShift);
2713 else
2714 Builder.buildTrunc(Dst, NewShift);
2715
2716 eraseInst(MI);
2717}
2718
2720 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2721 return MO.isReg() &&
2722 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2723 });
2724}
2725
2727 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2728 return !MO.isReg() ||
2729 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2730 });
2731}
2732
2734 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2735 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2736 return all_of(Mask, [](int Elt) { return Elt < 0; });
2737}
2738
2740 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2741 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2742 MRI);
2743}
2744
2746 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2747 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2748 MRI);
2749}
2750
2752 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2753 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2754 "Expected an insert/extract element op");
2755 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2756 unsigned IdxIdx =
2757 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2758 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2759 if (!Idx)
2760 return false;
2761 return Idx->getZExtValue() >= VecTy.getNumElements();
2762}
2763
2765 GSelect &SelMI = cast<GSelect>(MI);
2766 auto Cst =
2768 if (!Cst)
2769 return false;
2770 OpIdx = Cst->isZero() ? 3 : 2;
2771 return true;
2772}
2773
2774void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2775
2777 const MachineOperand &MOP2) {
2778 if (!MOP1.isReg() || !MOP2.isReg())
2779 return false;
2780 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2781 if (!InstAndDef1)
2782 return false;
2783 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2784 if (!InstAndDef2)
2785 return false;
2786 MachineInstr *I1 = InstAndDef1->MI;
2787 MachineInstr *I2 = InstAndDef2->MI;
2788
2789 // Handle a case like this:
2790 //
2791 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2792 //
2793 // Even though %0 and %1 are produced by the same instruction they are not
2794 // the same values.
2795 if (I1 == I2)
2796 return MOP1.getReg() == MOP2.getReg();
2797
2798 // If we have an instruction which loads or stores, we can't guarantee that
2799 // it is identical.
2800 //
2801 // For example, we may have
2802 //
2803 // %x1 = G_LOAD %addr (load N from @somewhere)
2804 // ...
2805 // call @foo
2806 // ...
2807 // %x2 = G_LOAD %addr (load N from @somewhere)
2808 // ...
2809 // %or = G_OR %x1, %x2
2810 //
2811 // It's possible that @foo will modify whatever lives at the address we're
2812 // loading from. To be safe, let's just assume that all loads and stores
2813 // are different (unless we have something which is guaranteed to not
2814 // change.)
2815 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2816 return false;
2817
2818 // If both instructions are loads or stores, they are equal only if both
2819 // are dereferenceable invariant loads with the same number of bits.
2820 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2821 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2822 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2823 if (!LS1 || !LS2)
2824 return false;
2825
2826 if (!I2->isDereferenceableInvariantLoad() ||
2827 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2828 return false;
2829 }
2830
2831 // Check for physical registers on the instructions first to avoid cases
2832 // like this:
2833 //
2834 // %a = COPY $physreg
2835 // ...
2836 // SOMETHING implicit-def $physreg
2837 // ...
2838 // %b = COPY $physreg
2839 //
2840 // These copies are not equivalent.
2841 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2842 return MO.isReg() && MO.getReg().isPhysical();
2843 })) {
2844 // Check if we have a case like this:
2845 //
2846 // %a = COPY $physreg
2847 // %b = COPY %a
2848 //
2849 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2850 // From that, we know that they must have the same value, since they must
2851 // have come from the same COPY.
2852 return I1->isIdenticalTo(*I2);
2853 }
2854
2855 // We don't have any physical registers, so we don't necessarily need the
2856 // same vreg defs.
2857 //
2858 // On the off-chance that there's some target instruction feeding into the
2859 // instruction, let's use produceSameValue instead of isIdenticalTo.
2860 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2861 // Handle instructions with multiple defs that produce same values. Values
2862 // are same for operands with same index.
2863 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2864 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2865 // I1 and I2 are different instructions but produce same values,
2866 // %1 and %6 are same, %1 and %7 are not the same value.
2867 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2868 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2869 }
2870 return false;
2871}
2872
2874 if (!MOP.isReg())
2875 return false;
2876 auto *MI = MRI.getVRegDef(MOP.getReg());
2877 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2878 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2879 MaybeCst->getSExtValue() == C;
2880}
2881
2883 if (!MOP.isReg())
2884 return false;
2885 std::optional<FPValueAndVReg> MaybeCst;
2886 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2887 return false;
2888
2889 return MaybeCst->Value.isExactlyValue(C);
2890}
2891
2893 unsigned OpIdx) {
2894 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2895 Register OldReg = MI.getOperand(0).getReg();
2896 Register Replacement = MI.getOperand(OpIdx).getReg();
2897 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2898 MI.eraseFromParent();
2899 replaceRegWith(MRI, OldReg, Replacement);
2900}
2901
2903 Register Replacement) {
2904 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2905 Register OldReg = MI.getOperand(0).getReg();
2906 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2907 MI.eraseFromParent();
2908 replaceRegWith(MRI, OldReg, Replacement);
2909}
2910
2912 unsigned ConstIdx) {
2913 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2914 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2915
2916 // Get the shift amount
2917 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2918 if (!VRegAndVal)
2919 return false;
2920
2921 // Return true of shift amount >= Bitwidth
2922 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2923}
2924
2926 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2927 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2928 "This is not a funnel shift operation");
2929
2930 Register ConstReg = MI.getOperand(3).getReg();
2931 LLT ConstTy = MRI.getType(ConstReg);
2932 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2933
2934 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2935 assert((VRegAndVal) && "Value is not a constant");
2936
2937 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2938 APInt NewConst = VRegAndVal->Value.urem(
2939 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2940
2941 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2943 MI.getOpcode(), {MI.getOperand(0)},
2944 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2945
2946 MI.eraseFromParent();
2947}
2948
2950 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2951 // Match (cond ? x : x)
2952 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2953 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2954 MRI);
2955}
2956
2958 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2959 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2960 MRI);
2961}
2962
2964 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2965 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2966 MRI);
2967}
2968
2970 MachineOperand &MO = MI.getOperand(OpIdx);
2971 return MO.isReg() &&
2972 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2973}
2974
2976 unsigned OpIdx) {
2977 MachineOperand &MO = MI.getOperand(OpIdx);
2978 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2979}
2980
2982 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2983 Builder.buildFConstant(MI.getOperand(0), C);
2984 MI.eraseFromParent();
2985}
2986
2988 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2989 Builder.buildConstant(MI.getOperand(0), C);
2990 MI.eraseFromParent();
2991}
2992
2994 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2995 Builder.buildConstant(MI.getOperand(0), C);
2996 MI.eraseFromParent();
2997}
2998
3000 ConstantFP *CFP) {
3001 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3002 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3003 MI.eraseFromParent();
3004}
3005
3007 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3008 Builder.buildUndef(MI.getOperand(0));
3009 MI.eraseFromParent();
3010}
3011
3013 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3014 Register LHS = MI.getOperand(1).getReg();
3015 Register RHS = MI.getOperand(2).getReg();
3016 Register &NewLHS = std::get<0>(MatchInfo);
3017 Register &NewRHS = std::get<1>(MatchInfo);
3018
3019 // Helper lambda to check for opportunities for
3020 // ((0-A) + B) -> B - A
3021 // (A + (0-B)) -> A - B
3022 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3023 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3024 return false;
3025 NewLHS = MaybeNewLHS;
3026 return true;
3027 };
3028
3029 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3030}
3031
3034 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3035 "Invalid opcode");
3036 Register DstReg = MI.getOperand(0).getReg();
3037 LLT DstTy = MRI.getType(DstReg);
3038 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3039 unsigned NumElts = DstTy.getNumElements();
3040 // If this MI is part of a sequence of insert_vec_elts, then
3041 // don't do the combine in the middle of the sequence.
3042 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3043 TargetOpcode::G_INSERT_VECTOR_ELT)
3044 return false;
3045 MachineInstr *CurrInst = &MI;
3046 MachineInstr *TmpInst;
3047 int64_t IntImm;
3048 Register TmpReg;
3049 MatchInfo.resize(NumElts);
3050 while (mi_match(
3051 CurrInst->getOperand(0).getReg(), MRI,
3052 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3053 if (IntImm >= NumElts || IntImm < 0)
3054 return false;
3055 if (!MatchInfo[IntImm])
3056 MatchInfo[IntImm] = TmpReg;
3057 CurrInst = TmpInst;
3058 }
3059 // Variable index.
3060 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3061 return false;
3062 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3063 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3064 if (!MatchInfo[I - 1].isValid())
3065 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3066 }
3067 return true;
3068 }
3069 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3070 // overwritten, bail out.
3071 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3072 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3073}
3074
3077 Register UndefReg;
3078 auto GetUndef = [&]() {
3079 if (UndefReg)
3080 return UndefReg;
3081 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3082 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3083 return UndefReg;
3084 };
3085 for (Register &Reg : MatchInfo) {
3086 if (!Reg)
3087 Reg = GetUndef();
3088 }
3089 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3090 MI.eraseFromParent();
3091}
3092
3094 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3095 Register SubLHS, SubRHS;
3096 std::tie(SubLHS, SubRHS) = MatchInfo;
3097 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3098 MI.eraseFromParent();
3099}
3100
3103 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3104 //
3105 // Creates the new hand + logic instruction (but does not insert them.)
3106 //
3107 // On success, MatchInfo is populated with the new instructions. These are
3108 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3109 unsigned LogicOpcode = MI.getOpcode();
3110 assert(LogicOpcode == TargetOpcode::G_AND ||
3111 LogicOpcode == TargetOpcode::G_OR ||
3112 LogicOpcode == TargetOpcode::G_XOR);
3113 MachineIRBuilder MIB(MI);
3114 Register Dst = MI.getOperand(0).getReg();
3115 Register LHSReg = MI.getOperand(1).getReg();
3116 Register RHSReg = MI.getOperand(2).getReg();
3117
3118 // Don't recompute anything.
3119 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3120 return false;
3121
3122 // Make sure we have (hand x, ...), (hand y, ...)
3123 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3124 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3125 if (!LeftHandInst || !RightHandInst)
3126 return false;
3127 unsigned HandOpcode = LeftHandInst->getOpcode();
3128 if (HandOpcode != RightHandInst->getOpcode())
3129 return false;
3130 if (!LeftHandInst->getOperand(1).isReg() ||
3131 !RightHandInst->getOperand(1).isReg())
3132 return false;
3133
3134 // Make sure the types match up, and if we're doing this post-legalization,
3135 // we end up with legal types.
3136 Register X = LeftHandInst->getOperand(1).getReg();
3137 Register Y = RightHandInst->getOperand(1).getReg();
3138 LLT XTy = MRI.getType(X);
3139 LLT YTy = MRI.getType(Y);
3140 if (!XTy.isValid() || XTy != YTy)
3141 return false;
3142
3143 // Optional extra source register.
3144 Register ExtraHandOpSrcReg;
3145 switch (HandOpcode) {
3146 default:
3147 return false;
3148 case TargetOpcode::G_ANYEXT:
3149 case TargetOpcode::G_SEXT:
3150 case TargetOpcode::G_ZEXT: {
3151 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3152 break;
3153 }
3154 case TargetOpcode::G_TRUNC: {
3155 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3156 const MachineFunction *MF = MI.getMF();
3157 const DataLayout &DL = MF->getDataLayout();
3158 LLVMContext &Ctx = MF->getFunction().getContext();
3159
3160 LLT DstTy = MRI.getType(Dst);
3161 const TargetLowering &TLI = getTargetLowering();
3162
3163 // Be extra careful sinking truncate. If it's free, there's no benefit in
3164 // widening a binop.
3165 if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
3166 TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
3167 return false;
3168 break;
3169 }
3170 case TargetOpcode::G_AND:
3171 case TargetOpcode::G_ASHR:
3172 case TargetOpcode::G_LSHR:
3173 case TargetOpcode::G_SHL: {
3174 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3175 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3176 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3177 return false;
3178 ExtraHandOpSrcReg = ZOp.getReg();
3179 break;
3180 }
3181 }
3182
3183 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3184 return false;
3185
3186 // Record the steps to build the new instructions.
3187 //
3188 // Steps to build (logic x, y)
3189 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3190 OperandBuildSteps LogicBuildSteps = {
3191 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3192 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3193 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3194 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3195
3196 // Steps to build hand (logic x, y), ...z
3197 OperandBuildSteps HandBuildSteps = {
3198 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3199 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3200 if (ExtraHandOpSrcReg.isValid())
3201 HandBuildSteps.push_back(
3202 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3203 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3204
3205 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3206 return true;
3207}
3208
3211 assert(MatchInfo.InstrsToBuild.size() &&
3212 "Expected at least one instr to build?");
3213 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3214 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3215 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3216 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3217 for (auto &OperandFn : InstrToBuild.OperandFns)
3218 OperandFn(Instr);
3219 }
3220 MI.eraseFromParent();
3221}
3222
3224 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3225 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3226 int64_t ShlCst, AshrCst;
3227 Register Src;
3228 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3229 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3230 m_ICstOrSplat(AshrCst))))
3231 return false;
3232 if (ShlCst != AshrCst)
3233 return false;
3235 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3236 return false;
3237 MatchInfo = std::make_tuple(Src, ShlCst);
3238 return true;
3239}
3240
3242 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3243 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3244 Register Src;
3245 int64_t ShiftAmt;
3246 std::tie(Src, ShiftAmt) = MatchInfo;
3247 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3248 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3249 MI.eraseFromParent();
3250}
3251
3252/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3254 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3255 assert(MI.getOpcode() == TargetOpcode::G_AND);
3256
3257 Register Dst = MI.getOperand(0).getReg();
3258 LLT Ty = MRI.getType(Dst);
3259
3260 Register R;
3261 int64_t C1;
3262 int64_t C2;
3263 if (!mi_match(
3264 Dst, MRI,
3265 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3266 return false;
3267
3268 MatchInfo = [=](MachineIRBuilder &B) {
3269 if (C1 & C2) {
3270 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3271 return;
3272 }
3273 auto Zero = B.buildConstant(Ty, 0);
3274 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3275 };
3276 return true;
3277}
3278
3280 Register &Replacement) {
3281 // Given
3282 //
3283 // %y:_(sN) = G_SOMETHING
3284 // %x:_(sN) = G_SOMETHING
3285 // %res:_(sN) = G_AND %x, %y
3286 //
3287 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3288 //
3289 // Patterns like this can appear as a result of legalization. E.g.
3290 //
3291 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3292 // %one:_(s32) = G_CONSTANT i32 1
3293 // %and:_(s32) = G_AND %cmp, %one
3294 //
3295 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3296 assert(MI.getOpcode() == TargetOpcode::G_AND);
3297 if (!KB)
3298 return false;
3299
3300 Register AndDst = MI.getOperand(0).getReg();
3301 Register LHS = MI.getOperand(1).getReg();
3302 Register RHS = MI.getOperand(2).getReg();
3303
3304 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3305 // we can't do anything. If we do, then it depends on whether we have
3306 // KnownBits on the LHS.
3307 KnownBits RHSBits = KB->getKnownBits(RHS);
3308 if (RHSBits.isUnknown())
3309 return false;
3310
3311 KnownBits LHSBits = KB->getKnownBits(LHS);
3312
3313 // Check that x & Mask == x.
3314 // x & 1 == x, always
3315 // x & 0 == x, only if x is also 0
3316 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3317 //
3318 // Check if we can replace AndDst with the LHS of the G_AND
3319 if (canReplaceReg(AndDst, LHS, MRI) &&
3320 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3321 Replacement = LHS;
3322 return true;
3323 }
3324
3325 // Check if we can replace AndDst with the RHS of the G_AND
3326 if (canReplaceReg(AndDst, RHS, MRI) &&
3327 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3328 Replacement = RHS;
3329 return true;
3330 }
3331
3332 return false;
3333}
3334
3336 // Given
3337 //
3338 // %y:_(sN) = G_SOMETHING
3339 // %x:_(sN) = G_SOMETHING
3340 // %res:_(sN) = G_OR %x, %y
3341 //
3342 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3343 assert(MI.getOpcode() == TargetOpcode::G_OR);
3344 if (!KB)
3345 return false;
3346
3347 Register OrDst = MI.getOperand(0).getReg();
3348 Register LHS = MI.getOperand(1).getReg();
3349 Register RHS = MI.getOperand(2).getReg();
3350
3351 KnownBits LHSBits = KB->getKnownBits(LHS);
3352 KnownBits RHSBits = KB->getKnownBits(RHS);
3353
3354 // Check that x | Mask == x.
3355 // x | 0 == x, always
3356 // x | 1 == x, only if x is also 1
3357 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3358 //
3359 // Check if we can replace OrDst with the LHS of the G_OR
3360 if (canReplaceReg(OrDst, LHS, MRI) &&
3361 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3362 Replacement = LHS;
3363 return true;
3364 }
3365
3366 // Check if we can replace OrDst with the RHS of the G_OR
3367 if (canReplaceReg(OrDst, RHS, MRI) &&
3368 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3369 Replacement = RHS;
3370 return true;
3371 }
3372
3373 return false;
3374}
3375
3377 // If the input is already sign extended, just drop the extension.
3378 Register Src = MI.getOperand(1).getReg();
3379 unsigned ExtBits = MI.getOperand(2).getImm();
3380 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3381 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3382}
3383
3384static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3385 int64_t Cst, bool IsVector, bool IsFP) {
3386 // For i1, Cst will always be -1 regardless of boolean contents.
3387 return (ScalarSizeBits == 1 && Cst == -1) ||
3388 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3389}
3390
3392 SmallVectorImpl<Register> &RegsToNegate) {
3393 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3394 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3395 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3396 Register XorSrc;
3397 Register CstReg;
3398 // We match xor(src, true) here.
3399 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3400 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3401 return false;
3402
3403 if (!MRI.hasOneNonDBGUse(XorSrc))
3404 return false;
3405
3406 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3407 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3408 // list of tree nodes to visit.
3409 RegsToNegate.push_back(XorSrc);
3410 // Remember whether the comparisons are all integer or all floating point.
3411 bool IsInt = false;
3412 bool IsFP = false;
3413 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3414 Register Reg = RegsToNegate[I];
3415 if (!MRI.hasOneNonDBGUse(Reg))
3416 return false;
3417 MachineInstr *Def = MRI.getVRegDef(Reg);
3418 switch (Def->getOpcode()) {
3419 default:
3420 // Don't match if the tree contains anything other than ANDs, ORs and
3421 // comparisons.
3422 return false;
3423 case TargetOpcode::G_ICMP:
3424 if (IsFP)
3425 return false;
3426 IsInt = true;
3427 // When we apply the combine we will invert the predicate.
3428 break;
3429 case TargetOpcode::G_FCMP:
3430 if (IsInt)
3431 return false;
3432 IsFP = true;
3433 // When we apply the combine we will invert the predicate.
3434 break;
3435 case TargetOpcode::G_AND:
3436 case TargetOpcode::G_OR:
3437 // Implement De Morgan's laws:
3438 // ~(x & y) -> ~x | ~y
3439 // ~(x | y) -> ~x & ~y
3440 // When we apply the combine we will change the opcode and recursively
3441 // negate the operands.
3442 RegsToNegate.push_back(Def->getOperand(1).getReg());
3443 RegsToNegate.push_back(Def->getOperand(2).getReg());
3444 break;
3445 }
3446 }
3447
3448 // Now we know whether the comparisons are integer or floating point, check
3449 // the constant in the xor.
3450 int64_t Cst;
3451 if (Ty.isVector()) {
3452 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3453 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3454 if (!MaybeCst)
3455 return false;
3456 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3457 return false;
3458 } else {
3459 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3460 return false;
3461 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3462 return false;
3463 }
3464
3465 return true;
3466}
3467
3469 SmallVectorImpl<Register> &RegsToNegate) {
3470 for (Register Reg : RegsToNegate) {
3471 MachineInstr *Def = MRI.getVRegDef(Reg);
3472 Observer.changingInstr(*Def);
3473 // For each comparison, invert the opcode. For each AND and OR, change the
3474 // opcode.
3475 switch (Def->getOpcode()) {
3476 default:
3477 llvm_unreachable("Unexpected opcode");
3478 case TargetOpcode::G_ICMP:
3479 case TargetOpcode::G_FCMP: {
3480 MachineOperand &PredOp = Def->getOperand(1);
3483 PredOp.setPredicate(NewP);
3484 break;
3485 }
3486 case TargetOpcode::G_AND:
3487 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3488 break;
3489 case TargetOpcode::G_OR:
3490 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3491 break;
3492 }
3493 Observer.changedInstr(*Def);
3494 }
3495
3496 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3497 MI.eraseFromParent();
3498}
3499
3501 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3502 // Match (xor (and x, y), y) (or any of its commuted cases)
3503 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3504 Register &X = MatchInfo.first;
3505 Register &Y = MatchInfo.second;
3506 Register AndReg = MI.getOperand(1).getReg();
3507 Register SharedReg = MI.getOperand(2).getReg();
3508
3509 // Find a G_AND on either side of the G_XOR.
3510 // Look for one of
3511 //
3512 // (xor (and x, y), SharedReg)
3513 // (xor SharedReg, (and x, y))
3514 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3515 std::swap(AndReg, SharedReg);
3516 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3517 return false;
3518 }
3519
3520 // Only do this if we'll eliminate the G_AND.
3521 if (!MRI.hasOneNonDBGUse(AndReg))
3522 return false;
3523
3524 // We can combine if SharedReg is the same as either the LHS or RHS of the
3525 // G_AND.
3526 if (Y != SharedReg)
3527 std::swap(X, Y);
3528 return Y == SharedReg;
3529}
3530
3532 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3533 // Fold (xor (and x, y), y) -> (and (not x), y)
3534 Register X, Y;
3535 std::tie(X, Y) = MatchInfo;
3536 auto Not = Builder.buildNot(MRI.getType(X), X);
3538 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3539 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3540 MI.getOperand(2).setReg(Y);
3542}
3543
3545 auto &PtrAdd = cast<GPtrAdd>(MI);
3546 Register DstReg = PtrAdd.getReg(0);
3547 LLT Ty = MRI.getType(DstReg);
3549
3550 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3551 return false;
3552
3553 if (Ty.isPointer()) {
3554 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3555 return ConstVal && *ConstVal == 0;
3556 }
3557
3558 assert(Ty.isVector() && "Expecting a vector type");
3559 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3560 return isBuildVectorAllZeros(*VecMI, MRI);
3561}
3562
3564 auto &PtrAdd = cast<GPtrAdd>(MI);
3565 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3566 PtrAdd.eraseFromParent();
3567}
3568
3569/// The second source operand is known to be a power of 2.
3571 Register DstReg = MI.getOperand(0).getReg();
3572 Register Src0 = MI.getOperand(1).getReg();
3573 Register Pow2Src1 = MI.getOperand(2).getReg();
3574 LLT Ty = MRI.getType(DstReg);
3575
3576 // Fold (urem x, pow2) -> (and x, pow2-1)
3577 auto NegOne = Builder.buildConstant(Ty, -1);
3578 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3579 Builder.buildAnd(DstReg, Src0, Add);
3580 MI.eraseFromParent();
3581}
3582
3584 unsigned &SelectOpNo) {
3585 Register LHS = MI.getOperand(1).getReg();
3586 Register RHS = MI.getOperand(2).getReg();
3587
3588 Register OtherOperandReg = RHS;
3589 SelectOpNo = 1;
3591
3592 // Don't do this unless the old select is going away. We want to eliminate the
3593 // binary operator, not replace a binop with a select.
3594 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3596 OtherOperandReg = LHS;
3597 SelectOpNo = 2;
3599 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3601 return false;
3602 }
3603
3604 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3605 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3606
3607 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3608 /*AllowFP*/ true,
3609 /*AllowOpaqueConstants*/ false))
3610 return false;
3611 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3612 /*AllowFP*/ true,
3613 /*AllowOpaqueConstants*/ false))
3614 return false;
3615
3616 unsigned BinOpcode = MI.getOpcode();
3617
3618 // We know that one of the operands is a select of constants. Now verify that
3619 // the other binary operator operand is either a constant, or we can handle a
3620 // variable.
3621 bool CanFoldNonConst =
3622 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3623 (isNullOrNullSplat(*SelectLHS, MRI) ||
3624 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3625 (isNullOrNullSplat(*SelectRHS, MRI) ||
3626 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3627 if (CanFoldNonConst)
3628 return true;
3629
3630 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3631 /*AllowFP*/ true,
3632 /*AllowOpaqueConstants*/ false);
3633}
3634
3635/// \p SelectOperand is the operand in binary operator \p MI that is the select
3636/// to fold.
3638 const unsigned &SelectOperand) {
3639 Register Dst = MI.getOperand(0).getReg();
3640 Register LHS = MI.getOperand(1).getReg();
3641 Register RHS = MI.getOperand(2).getReg();
3642 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3643
3644 Register SelectCond = Select->getOperand(1).getReg();
3645 Register SelectTrue = Select->getOperand(2).getReg();
3646 Register SelectFalse = Select->getOperand(3).getReg();
3647
3648 LLT Ty = MRI.getType(Dst);
3649 unsigned BinOpcode = MI.getOpcode();
3650
3651 Register FoldTrue, FoldFalse;
3652
3653 // We have a select-of-constants followed by a binary operator with a
3654 // constant. Eliminate the binop by pulling the constant math into the select.
3655 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3656 if (SelectOperand == 1) {
3657 // TODO: SelectionDAG verifies this actually constant folds before
3658 // committing to the combine.
3659
3660 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3661 FoldFalse =
3662 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3663 } else {
3664 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3665 FoldFalse =
3666 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3667 }
3668
3669 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3670 MI.eraseFromParent();
3671}
3672
3673std::optional<SmallVector<Register, 8>>
3674CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3675 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3676 // We want to detect if Root is part of a tree which represents a bunch
3677 // of loads being merged into a larger load. We'll try to recognize patterns
3678 // like, for example:
3679 //
3680 // Reg Reg
3681 // \ /
3682 // OR_1 Reg
3683 // \ /
3684 // OR_2
3685 // \ Reg
3686 // .. /
3687 // Root
3688 //
3689 // Reg Reg Reg Reg
3690 // \ / \ /
3691 // OR_1 OR_2
3692 // \ /
3693 // \ /
3694 // ...
3695 // Root
3696 //
3697 // Each "Reg" may have been produced by a load + some arithmetic. This
3698 // function will save each of them.
3699 SmallVector<Register, 8> RegsToVisit;
3701
3702 // In the "worst" case, we're dealing with a load for each byte. So, there
3703 // are at most #bytes - 1 ORs.
3704 const unsigned MaxIter =
3705 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3706 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3707 if (Ors.empty())
3708 break;
3709 const MachineInstr *Curr = Ors.pop_back_val();
3710 Register OrLHS = Curr->getOperand(1).getReg();
3711 Register OrRHS = Curr->getOperand(2).getReg();
3712
3713 // In the combine, we want to elimate the entire tree.
3714 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3715 return std::nullopt;
3716
3717 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3718 // something that may be a load + arithmetic.
3719 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3720 Ors.push_back(Or);
3721 else
3722 RegsToVisit.push_back(OrLHS);
3723 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3724 Ors.push_back(Or);
3725 else
3726 RegsToVisit.push_back(OrRHS);
3727 }
3728
3729 // We're going to try and merge each register into a wider power-of-2 type,
3730 // so we ought to have an even number of registers.
3731 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3732 return std::nullopt;
3733 return RegsToVisit;
3734}
3735
3736/// Helper function for findLoadOffsetsForLoadOrCombine.
3737///
3738/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3739/// and then moving that value into a specific byte offset.
3740///
3741/// e.g. x[i] << 24
3742///
3743/// \returns The load instruction and the byte offset it is moved into.
3744static std::optional<std::pair<GZExtLoad *, int64_t>>
3745matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3746 const MachineRegisterInfo &MRI) {
3747 assert(MRI.hasOneNonDBGUse(Reg) &&
3748 "Expected Reg to only have one non-debug use?");
3749 Register MaybeLoad;
3750 int64_t Shift;
3751 if (!mi_match(Reg, MRI,
3752 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3753 Shift = 0;
3754 MaybeLoad = Reg;
3755 }
3756
3757 if (Shift % MemSizeInBits != 0)
3758 return std::nullopt;
3759
3760 // TODO: Handle other types of loads.
3761 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3762 if (!Load)
3763 return std::nullopt;
3764
3765 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3766 return std::nullopt;
3767
3768 return std::make_pair(Load, Shift / MemSizeInBits);
3769}
3770
3771std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3772CombinerHelper::findLoadOffsetsForLoadOrCombine(
3774 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3775
3776 // Each load found for the pattern. There should be one for each RegsToVisit.
3778
3779 // The lowest index used in any load. (The lowest "i" for each x[i].)
3780 int64_t LowestIdx = INT64_MAX;
3781
3782 // The load which uses the lowest index.
3783 GZExtLoad *LowestIdxLoad = nullptr;
3784
3785 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3786 SmallSet<int64_t, 8> SeenIdx;
3787
3788 // Ensure each load is in the same MBB.
3789 // TODO: Support multiple MachineBasicBlocks.
3790 MachineBasicBlock *MBB = nullptr;
3791 const MachineMemOperand *MMO = nullptr;
3792
3793 // Earliest instruction-order load in the pattern.
3794 GZExtLoad *EarliestLoad = nullptr;
3795
3796 // Latest instruction-order load in the pattern.
3797 GZExtLoad *LatestLoad = nullptr;
3798
3799 // Base pointer which every load should share.
3801
3802 // We want to find a load for each register. Each load should have some
3803 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3804 // track of the load which uses the lowest index. Later, we will check if we
3805 // can use its pointer in the final, combined load.
3806 for (auto Reg : RegsToVisit) {
3807 // Find the load, and find the position that it will end up in (e.g. a
3808 // shifted) value.
3809 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3810 if (!LoadAndPos)
3811 return std::nullopt;
3812 GZExtLoad *Load;
3813 int64_t DstPos;
3814 std::tie(Load, DstPos) = *LoadAndPos;
3815
3816 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3817 // it is difficult to check for stores/calls/etc between loads.
3818 MachineBasicBlock *LoadMBB = Load->getParent();
3819 if (!MBB)
3820 MBB = LoadMBB;
3821 if (LoadMBB != MBB)
3822 return std::nullopt;
3823
3824 // Make sure that the MachineMemOperands of every seen load are compatible.
3825 auto &LoadMMO = Load->getMMO();
3826 if (!MMO)
3827 MMO = &LoadMMO;
3828 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3829 return std::nullopt;
3830
3831 // Find out what the base pointer and index for the load is.
3832 Register LoadPtr;
3833 int64_t Idx;
3834 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3835 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3836 LoadPtr = Load->getOperand(1).getReg();
3837 Idx = 0;
3838 }
3839
3840 // Don't combine things like a[i], a[i] -> a bigger load.
3841 if (!SeenIdx.insert(Idx).second)
3842 return std::nullopt;
3843
3844 // Every load must share the same base pointer; don't combine things like:
3845 //
3846 // a[i], b[i + 1] -> a bigger load.
3847 if (!BasePtr.isValid())
3848 BasePtr = LoadPtr;
3849 if (BasePtr != LoadPtr)
3850 return std::nullopt;
3851
3852 if (Idx < LowestIdx) {
3853 LowestIdx = Idx;
3854 LowestIdxLoad = Load;
3855 }
3856
3857 // Keep track of the byte offset that this load ends up at. If we have seen
3858 // the byte offset, then stop here. We do not want to combine:
3859 //
3860 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3861 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3862 return std::nullopt;
3863 Loads.insert(Load);
3864
3865 // Keep track of the position of the earliest/latest loads in the pattern.
3866 // We will check that there are no load fold barriers between them later
3867 // on.
3868 //
3869 // FIXME: Is there a better way to check for load fold barriers?
3870 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3871 EarliestLoad = Load;
3872 if (!LatestLoad || dominates(*LatestLoad, *Load))
3873 LatestLoad = Load;
3874 }
3875
3876 // We found a load for each register. Let's check if each load satisfies the
3877 // pattern.
3878 assert(Loads.size() == RegsToVisit.size() &&
3879 "Expected to find a load for each register?");
3880 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3881 LatestLoad && "Expected at least two loads?");
3882
3883 // Check if there are any stores, calls, etc. between any of the loads. If
3884 // there are, then we can't safely perform the combine.
3885 //
3886 // MaxIter is chosen based off the (worst case) number of iterations it
3887 // typically takes to succeed in the LLVM test suite plus some padding.
3888 //
3889 // FIXME: Is there a better way to check for load fold barriers?
3890 const unsigned MaxIter = 20;
3891 unsigned Iter = 0;
3892 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3893 LatestLoad->getIterator())) {
3894 if (Loads.count(&MI))
3895 continue;
3896 if (MI.isLoadFoldBarrier())
3897 return std::nullopt;
3898 if (Iter++ == MaxIter)
3899 return std::nullopt;
3900 }
3901
3902 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3903}
3904
3906 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3907 assert(MI.getOpcode() == TargetOpcode::G_OR);
3908 MachineFunction &MF = *MI.getMF();
3909 // Assuming a little-endian target, transform:
3910 // s8 *a = ...
3911 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3912 // =>
3913 // s32 val = *((i32)a)
3914 //
3915 // s8 *a = ...
3916 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3917 // =>
3918 // s32 val = BSWAP(*((s32)a))
3919 Register Dst = MI.getOperand(0).getReg();
3920 LLT Ty = MRI.getType(Dst);
3921 if (Ty.isVector())
3922 return false;
3923
3924 // We need to combine at least two loads into this type. Since the smallest
3925 // possible load is into a byte, we need at least a 16-bit wide type.
3926 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3927 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3928 return false;
3929
3930 // Match a collection of non-OR instructions in the pattern.
3931 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3932 if (!RegsToVisit)
3933 return false;
3934
3935 // We have a collection of non-OR instructions. Figure out how wide each of
3936 // the small loads should be based off of the number of potential loads we
3937 // found.
3938 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3939 if (NarrowMemSizeInBits % 8 != 0)
3940 return false;
3941
3942 // Check if each register feeding into each OR is a load from the same
3943 // base pointer + some arithmetic.
3944 //
3945 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3946 //
3947 // Also verify that each of these ends up putting a[i] into the same memory
3948 // offset as a load into a wide type would.
3950 GZExtLoad *LowestIdxLoad, *LatestLoad;
3951 int64_t LowestIdx;
3952 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3953 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3954 if (!MaybeLoadInfo)
3955 return false;
3956 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3957
3958 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3959 // we found before, check if this corresponds to a big or little endian byte
3960 // pattern. If it does, then we can represent it using a load + possibly a
3961 // BSWAP.
3962 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3963 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3964 if (!IsBigEndian)
3965 return false;
3966 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3967 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3968 return false;
3969
3970 // Make sure that the load from the lowest index produces offset 0 in the
3971 // final value.
3972 //
3973 // This ensures that we won't combine something like this:
3974 //
3975 // load x[i] -> byte 2
3976 // load x[i+1] -> byte 0 ---> wide_load x[i]
3977 // load x[i+2] -> byte 1
3978 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3979 const unsigned ZeroByteOffset =
3980 *IsBigEndian
3981 ? bigEndianByteAt(NumLoadsInTy, 0)
3982 : littleEndianByteAt(NumLoadsInTy, 0);
3983 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3984 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3985 ZeroOffsetIdx->second != LowestIdx)
3986 return false;
3987
3988 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3989 // may not use index 0.
3990 Register Ptr = LowestIdxLoad->getPointerReg();
3991 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3992 LegalityQuery::MemDesc MMDesc(MMO);
3993 MMDesc.MemoryTy = Ty;
3995 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3996 return false;
3997 auto PtrInfo = MMO.getPointerInfo();
3998 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3999
4000 // Load must be allowed and fast on the target.
4002 auto &DL = MF.getDataLayout();
4003 unsigned Fast = 0;
4004 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4005 !Fast)
4006 return false;
4007
4008 MatchInfo = [=](MachineIRBuilder &MIB) {
4009 MIB.setInstrAndDebugLoc(*LatestLoad);
4010 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4011 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4012 if (NeedsBSwap)
4013 MIB.buildBSwap(Dst, LoadDst);
4014 };
4015 return true;
4016}
4017
4019 MachineInstr *&ExtMI) {
4020 auto &PHI = cast<GPhi>(MI);
4021 Register DstReg = PHI.getReg(0);
4022
4023 // TODO: Extending a vector may be expensive, don't do this until heuristics
4024 // are better.
4025 if (MRI.getType(DstReg).isVector())
4026 return false;
4027
4028 // Try to match a phi, whose only use is an extend.
4029 if (!MRI.hasOneNonDBGUse(DstReg))
4030 return false;
4031 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4032 switch (ExtMI->getOpcode()) {
4033 case TargetOpcode::G_ANYEXT:
4034 return true; // G_ANYEXT is usually free.
4035 case TargetOpcode::G_ZEXT:
4036 case TargetOpcode::G_SEXT:
4037 break;
4038 default:
4039 return false;
4040 }
4041
4042 // If the target is likely to fold this extend away, don't propagate.
4044 return false;
4045
4046 // We don't want to propagate the extends unless there's a good chance that
4047 // they'll be optimized in some way.
4048 // Collect the unique incoming values.
4050 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4051 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4052 switch (DefMI->getOpcode()) {
4053 case TargetOpcode::G_LOAD:
4054 case TargetOpcode::G_TRUNC:
4055 case TargetOpcode::G_SEXT:
4056 case TargetOpcode::G_ZEXT:
4057 case TargetOpcode::G_ANYEXT:
4058 case TargetOpcode::G_CONSTANT:
4059 InSrcs.insert(DefMI);
4060 // Don't try to propagate if there are too many places to create new
4061 // extends, chances are it'll increase code size.
4062 if (InSrcs.size() > 2)
4063 return false;
4064 break;
4065 default:
4066 return false;
4067 }
4068 }
4069 return true;
4070}
4071
4073 MachineInstr *&ExtMI) {
4074 auto &PHI = cast<GPhi>(MI);
4075 Register DstReg = ExtMI->getOperand(0).getReg();
4076 LLT ExtTy = MRI.getType(DstReg);
4077
4078 // Propagate the extension into the block of each incoming reg's block.
4079 // Use a SetVector here because PHIs can have duplicate edges, and we want
4080 // deterministic iteration order.
4083 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4084 auto SrcReg = PHI.getIncomingValue(I);
4085 auto *SrcMI = MRI.getVRegDef(SrcReg);
4086 if (!SrcMIs.insert(SrcMI))
4087 continue;
4088
4089 // Build an extend after each src inst.
4090 auto *MBB = SrcMI->getParent();
4091 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4092 if (InsertPt != MBB->end() && InsertPt->isPHI())
4093 InsertPt = MBB->getFirstNonPHI();
4094
4095 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4096 Builder.setDebugLoc(MI.getDebugLoc());
4097 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4098 OldToNewSrcMap[SrcMI] = NewExt;
4099 }
4100
4101 // Create a new phi with the extended inputs.
4103 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4104 NewPhi.addDef(DstReg);
4105 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4106 if (!MO.isReg()) {
4107 NewPhi.addMBB(MO.getMBB());
4108 continue;
4109 }
4110 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4111 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4112 }
4113 Builder.insertInstr(NewPhi);
4114 ExtMI->eraseFromParent();
4115}
4116
4118 Register &Reg) {
4119 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4120 // If we have a constant index, look for a G_BUILD_VECTOR source
4121 // and find the source register that the index maps to.
4122 Register SrcVec = MI.getOperand(1).getReg();
4123 LLT SrcTy = MRI.getType(SrcVec);
4124
4125 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4126 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4127 return false;
4128
4129 unsigned VecIdx = Cst->Value.getZExtValue();
4130
4131 // Check if we have a build_vector or build_vector_trunc with an optional
4132 // trunc in front.
4133 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4134 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4135 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4136 }
4137
4138 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4139 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4140 return false;
4141
4142 EVT Ty(getMVTForLLT(SrcTy));
4143 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4144 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4145 return false;
4146
4147 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4148 return true;
4149}
4150
4152 Register &Reg) {
4153 // Check the type of the register, since it may have come from a
4154 // G_BUILD_VECTOR_TRUNC.
4155 LLT ScalarTy = MRI.getType(Reg);
4156 Register DstReg = MI.getOperand(0).getReg();
4157 LLT DstTy = MRI.getType(DstReg);
4158
4159 if (ScalarTy != DstTy) {
4160 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4161 Builder.buildTrunc(DstReg, Reg);
4162 MI.eraseFromParent();
4163 return;
4164 }
4166}
4167
4170 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4171 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4172 // This combine tries to find build_vector's which have every source element
4173 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4174 // the masked load scalarization is run late in the pipeline. There's already
4175 // a combine for a similar pattern starting from the extract, but that
4176 // doesn't attempt to do it if there are multiple uses of the build_vector,
4177 // which in this case is true. Starting the combine from the build_vector
4178 // feels more natural than trying to find sibling nodes of extracts.
4179 // E.g.
4180 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4181 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4182 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4183 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4184 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4185 // ==>
4186 // replace ext{1,2,3,4} with %s{1,2,3,4}
4187
4188 Register DstReg = MI.getOperand(0).getReg();
4189 LLT DstTy = MRI.getType(DstReg);
4190 unsigned NumElts = DstTy.getNumElements();
4191
4192 SmallBitVector ExtractedElts(NumElts);
4193 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4194 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4195 return false;
4196 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4197 if (!Cst)
4198 return false;
4199 unsigned Idx = Cst->getZExtValue();
4200 if (Idx >= NumElts)
4201 return false; // Out of range.
4202 ExtractedElts.set(Idx);
4203 SrcDstPairs.emplace_back(
4204 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4205 }
4206 // Match if every element was extracted.
4207 return ExtractedElts.all();
4208}
4209
4212 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4213 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4214 for (auto &Pair : SrcDstPairs) {
4215 auto *ExtMI = Pair.second;
4216 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4217 ExtMI->eraseFromParent();
4218 }
4219 MI.eraseFromParent();
4220}
4221
4223 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4224 applyBuildFnNoErase(MI, MatchInfo);
4225 MI.eraseFromParent();
4226}
4227
4229 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4230 MatchInfo(Builder);
4231}
4232
4234 BuildFnTy &MatchInfo) {
4235 assert(MI.getOpcode() == TargetOpcode::G_OR);
4236
4237 Register Dst = MI.getOperand(0).getReg();
4238 LLT Ty = MRI.getType(Dst);
4239 unsigned BitWidth = Ty.getScalarSizeInBits();
4240
4241 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4242 unsigned FshOpc = 0;
4243
4244 // Match (or (shl ...), (lshr ...)).
4245 if (!mi_match(Dst, MRI,
4246 // m_GOr() handles the commuted version as well.
4247 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4248 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4249 return false;
4250
4251 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4252 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4253 int64_t CstShlAmt, CstLShrAmt;
4254 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4255 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4256 CstShlAmt + CstLShrAmt == BitWidth) {
4257 FshOpc = TargetOpcode::G_FSHR;
4258 Amt = LShrAmt;
4259
4260 } else if (mi_match(LShrAmt, MRI,
4262 ShlAmt == Amt) {
4263 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4264 FshOpc = TargetOpcode::G_FSHL;
4265
4266 } else if (mi_match(ShlAmt, MRI,
4268 LShrAmt == Amt) {
4269 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4270 FshOpc = TargetOpcode::G_FSHR;
4271
4272 } else {
4273 return false;
4274 }
4275
4276 LLT AmtTy = MRI.getType(Amt);
4277 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4278 return false;
4279
4280 MatchInfo = [=](MachineIRBuilder &B) {
4281 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4282 };
4283 return true;
4284}
4285
4286/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4288 unsigned Opc = MI.getOpcode();
4289 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4290 Register X = MI.getOperand(1).getReg();
4291 Register Y = MI.getOperand(2).getReg();
4292 if (X != Y)
4293 return false;
4294 unsigned RotateOpc =
4295 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4296 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4297}
4298
4300 unsigned Opc = MI.getOpcode();
4301 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4302 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4304 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4305 : TargetOpcode::G_ROTR));
4306 MI.removeOperand(2);
4308}
4309
4310// Fold (rot x, c) -> (rot x, c % BitSize)
4312 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4313 MI.getOpcode() == TargetOpcode::G_ROTR);
4314 unsigned Bitsize =
4315 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4316 Register AmtReg = MI.getOperand(2).getReg();
4317 bool OutOfRange = false;
4318 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4319 if (auto *CI = dyn_cast<ConstantInt>(C))
4320 OutOfRange |= CI->getValue().uge(Bitsize);
4321 return true;
4322 };
4323 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4324}
4325
4327 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4328 MI.getOpcode() == TargetOpcode::G_ROTR);
4329 unsigned Bitsize =
4330 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4331 Register Amt = MI.getOperand(2).getReg();
4332 LLT AmtTy = MRI.getType(Amt);
4333 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4334 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4336 MI.getOperand(2).setReg(Amt);
4338}
4339
4341 int64_t &MatchInfo) {
4342 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4343 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4344
4345 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4346 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4347 // KnownBits on the LHS in two cases:
4348 //
4349 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4350 // we cannot do any transforms so we can safely bail out early.
4351 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4352 // >=0.
4353 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4354 if (KnownRHS.isUnknown())
4355 return false;
4356
4357 std::optional<bool> KnownVal;
4358 if (KnownRHS.isZero()) {
4359 // ? uge 0 -> always true
4360 // ? ult 0 -> always false
4361 if (Pred == CmpInst::ICMP_UGE)
4362 KnownVal = true;
4363 else if (Pred == CmpInst::ICMP_ULT)
4364 KnownVal = false;
4365 }
4366
4367 if (!KnownVal) {
4368 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4369 switch (Pred) {
4370 default:
4371 llvm_unreachable("Unexpected G_ICMP predicate?");
4372 case CmpInst::ICMP_EQ:
4373 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4374 break;
4375 case CmpInst::ICMP_NE:
4376 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4377 break;
4378 case CmpInst::ICMP_SGE:
4379 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4380 break;
4381 case CmpInst::ICMP_SGT:
4382 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4383 break;
4384 case CmpInst::ICMP_SLE:
4385 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4386 break;
4387 case CmpInst::ICMP_SLT:
4388 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4389 break;
4390 case CmpInst::ICMP_UGE:
4391 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4392 break;
4393 case CmpInst::ICMP_UGT:
4394 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4395 break;
4396 case CmpInst::ICMP_ULE:
4397 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4398 break;
4399 case CmpInst::ICMP_ULT:
4400 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4401 break;
4402 }
4403 }
4404
4405 if (!KnownVal)
4406 return false;
4407 MatchInfo =
4408 *KnownVal
4410 /*IsVector = */
4411 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4412 /* IsFP = */ false)
4413 : 0;
4414 return true;
4415}
4416
4418 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4419 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4420 // Given:
4421 //
4422 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4423 // %cmp = G_ICMP ne %x, 0
4424 //
4425 // Or:
4426 //
4427 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4428 // %cmp = G_ICMP eq %x, 1
4429 //
4430 // We can replace %cmp with %x assuming true is 1 on the target.
4431 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4432 if (!CmpInst::isEquality(Pred))
4433 return false;
4434 Register Dst = MI.getOperand(0).getReg();
4435 LLT DstTy = MRI.getType(Dst);
4437 /* IsFP = */ false) != 1)
4438 return false;
4439 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4440 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4441 return false;
4442 Register LHS = MI.getOperand(2).getReg();
4443 auto KnownLHS = KB->getKnownBits(LHS);
4444 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4445 return false;
4446 // Make sure replacing Dst with the LHS is a legal operation.
4447 LLT LHSTy = MRI.getType(LHS);
4448 unsigned LHSSize = LHSTy.getSizeInBits();
4449 unsigned DstSize = DstTy.getSizeInBits();
4450 unsigned Op = TargetOpcode::COPY;
4451 if (DstSize != LHSSize)
4452 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4453 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4454 return false;
4455 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4456 return true;
4457}
4458
4459// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4461 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4462 assert(MI.getOpcode() == TargetOpcode::G_AND);
4463
4464 // Ignore vector types to simplify matching the two constants.
4465 // TODO: do this for vectors and scalars via a demanded bits analysis.
4466 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4467 if (Ty.isVector())
4468 return false;
4469
4470 Register Src;
4471 Register AndMaskReg;
4472 int64_t AndMaskBits;
4473 int64_t OrMaskBits;
4474 if (!mi_match(MI, MRI,
4475 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4476 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4477 return false;
4478
4479 // Check if OrMask could turn on any bits in Src.
4480 if (AndMaskBits & OrMaskBits)
4481 return false;
4482
4483 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4485 // Canonicalize the result to have the constant on the RHS.
4486 if (MI.getOperand(1).getReg() == AndMaskReg)
4487 MI.getOperand(2).setReg(AndMaskReg);
4488 MI.getOperand(1).setReg(Src);
4490 };
4491 return true;
4492}
4493
4494/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4496 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4497 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4498 Register Dst = MI.getOperand(0).getReg();
4499 Register Src = MI.getOperand(1).getReg();
4500 LLT Ty = MRI.getType(Src);
4502 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4503 return false;
4504 int64_t Width = MI.getOperand(2).getImm();
4505 Register ShiftSrc;
4506 int64_t ShiftImm;
4507 if (!mi_match(
4508 Src, MRI,
4509 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4510 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4511 return false;
4512 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4513 return false;
4514
4515 MatchInfo = [=](MachineIRBuilder &B) {
4516 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4517 auto Cst2 = B.buildConstant(ExtractTy, Width);
4518 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4519 };
4520 return true;
4521}
4522
4523/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4525 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4526 assert(MI.getOpcode() == TargetOpcode::G_AND);
4527 Register Dst = MI.getOperand(0).getReg();
4528 LLT Ty = MRI.getType(Dst);
4530 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4531 return false;
4532
4533 int64_t AndImm, LSBImm;
4534 Register ShiftSrc;
4535 const unsigned Size = Ty.getScalarSizeInBits();
4536 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4537 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4538 m_ICst(AndImm))))
4539 return false;
4540
4541 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4542 auto MaybeMask = static_cast<uint64_t>(AndImm);
4543 if (MaybeMask & (MaybeMask + 1))
4544 return false;
4545
4546 // LSB must fit within the register.
4547 if (static_cast<uint64_t>(LSBImm) >= Size)
4548 return false;
4549
4550 uint64_t Width = APInt(Size, AndImm).countr_one();
4551 MatchInfo = [=](MachineIRBuilder &B) {
4552 auto WidthCst = B.buildConstant(ExtractTy, Width);
4553 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4554 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4555 };
4556 return true;
4557}
4558
4560 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4561 const unsigned Opcode = MI.getOpcode();
4562 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4563
4564 const Register Dst = MI.getOperand(0).getReg();
4565
4566 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4567 ? TargetOpcode::G_SBFX
4568 : TargetOpcode::G_UBFX;
4569
4570 // Check if the type we would use for the extract is legal
4571 LLT Ty = MRI.getType(Dst);
4573 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4574 return false;
4575
4576 Register ShlSrc;
4577 int64_t ShrAmt;
4578 int64_t ShlAmt;
4579 const unsigned Size = Ty.getScalarSizeInBits();
4580
4581 // Try to match shr (shl x, c1), c2
4582 if (!mi_match(Dst, MRI,
4583 m_BinOp(Opcode,
4584 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4585 m_ICst(ShrAmt))))
4586 return false;
4587
4588 // Make sure that the shift sizes can fit a bitfield extract
4589 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4590 return false;
4591
4592 // Skip this combine if the G_SEXT_INREG combine could handle it
4593 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4594 return false;
4595
4596 // Calculate start position and width of the extract
4597 const int64_t Pos = ShrAmt - ShlAmt;
4598 const int64_t Width = Size - ShrAmt;
4599
4600 MatchInfo = [=](MachineIRBuilder &B) {
4601 auto WidthCst = B.buildConstant(ExtractTy, Width);
4602 auto PosCst = B.buildConstant(ExtractTy, Pos);
4603 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4604 };
4605 return true;
4606}
4607
4609 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4610 const unsigned Opcode = MI.getOpcode();
4611 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4612
4613 const Register Dst = MI.getOperand(0).getReg();
4614 LLT Ty = MRI.getType(Dst);
4616 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4617 return false;
4618
4619 // Try to match shr (and x, c1), c2
4620 Register AndSrc;
4621 int64_t ShrAmt;
4622 int64_t SMask;
4623 if (!mi_match(Dst, MRI,
4624 m_BinOp(Opcode,
4625 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4626 m_ICst(ShrAmt))))
4627 return false;
4628
4629 const unsigned Size = Ty.getScalarSizeInBits();
4630 if (ShrAmt < 0 || ShrAmt >= Size)
4631 return false;
4632
4633 // If the shift subsumes the mask, emit the 0 directly.
4634 if (0 == (SMask >> ShrAmt)) {
4635 MatchInfo = [=](MachineIRBuilder &B) {
4636 B.buildConstant(Dst, 0);
4637 };
4638 return true;
4639 }
4640
4641 // Check that ubfx can do the extraction, with no holes in the mask.
4642 uint64_t UMask = SMask;
4643 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4644 UMask &= maskTrailingOnes<uint64_t>(Size);
4645 if (!isMask_64(UMask))
4646 return false;
4647
4648 // Calculate start position and width of the extract.
4649 const int64_t Pos = ShrAmt;
4650 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4651
4652 // It's preferable to keep the shift, rather than form G_SBFX.
4653 // TODO: remove the G_AND via demanded bits analysis.
4654 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4655 return false;
4656
4657 MatchInfo = [=](MachineIRBuilder &B) {
4658 auto WidthCst = B.buildConstant(ExtractTy, Width);
4659 auto PosCst = B.buildConstant(ExtractTy, Pos);
4660 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4661 };
4662 return true;
4663}
4664
4665bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4666 MachineInstr &MI) {
4667 auto &PtrAdd = cast<GPtrAdd>(MI);
4668
4669 Register Src1Reg = PtrAdd.getBaseReg();
4670 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4671 if (!Src1Def)
4672 return false;
4673
4674 Register Src2Reg = PtrAdd.getOffsetReg();
4675
4676 if (MRI.hasOneNonDBGUse(Src1Reg))
4677 return false;
4678
4679 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4680 if (!C1)
4681 return false;
4682 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4683 if (!C2)
4684 return false;
4685
4686 const APInt &C1APIntVal = *C1;
4687 const APInt &C2APIntVal = *C2;
4688 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4689
4690 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4691 // This combine may end up running before ptrtoint/inttoptr combines
4692 // manage to eliminate redundant conversions, so try to look through them.
4693 MachineInstr *ConvUseMI = &UseMI;
4694 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4695 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4696 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4697 Register DefReg = ConvUseMI->getOperand(0).getReg();
4698 if (!MRI.hasOneNonDBGUse(DefReg))
4699 break;
4700 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4701 ConvUseOpc = ConvUseMI->getOpcode();
4702 }
4703 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4704 if (!LdStMI)
4705 continue;
4706 // Is x[offset2] already not a legal addressing mode? If so then
4707 // reassociating the constants breaks nothing (we test offset2 because
4708 // that's the one we hope to fold into the load or store).
4710 AM.HasBaseReg = true;
4711 AM.BaseOffs = C2APIntVal.getSExtValue();
4712 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4713 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4714 PtrAdd.getMF()->getFunction().getContext());
4715 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4716 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4717 AccessTy, AS))
4718 continue;
4719
4720 // Would x[offset1+offset2] still be a legal addressing mode?
4721 AM.BaseOffs = CombinedValue;
4722 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4723 AccessTy, AS))
4724 return true;
4725 }
4726
4727 return false;
4728}
4729
4731 MachineInstr *RHS,
4732 BuildFnTy &MatchInfo) {
4733 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4734 Register Src1Reg = MI.getOperand(1).getReg();
4735 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4736 return false;
4737 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4738 if (!C2)
4739 return false;
4740
4741 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4742 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4743
4744 auto NewBase =
4745 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4747 MI.getOperand(1).setReg(NewBase.getReg(0));
4748 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4750 };
4751 return !reassociationCanBreakAddressingModePattern(MI);
4752}
4753
4755 MachineInstr *LHS,
4756 MachineInstr *RHS,
4757 BuildFnTy &MatchInfo) {
4758 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4759 // if and only if (G_PTR_ADD X, C) has one use.
4760 Register LHSBase;
4761 std::optional<ValueAndVReg> LHSCstOff;
4762 if (!mi_match(MI.getBaseReg(), MRI,
4763 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4764 return false;
4765
4766 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4767 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4768 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4769 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4770 // doesn't happen.
4771 LHSPtrAdd->moveBefore(&MI);
4772 Register RHSReg = MI.getOffsetReg();
4773 // set VReg will cause type mismatch if it comes from extend/trunc
4774 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4776 MI.getOperand(2).setReg(NewCst.getReg(0));
4778 Observer.changingInstr(*LHSPtrAdd);
4779 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4780 Observer.changedInstr(*LHSPtrAdd);
4781 };
4782 return !reassociationCanBreakAddressingModePattern(MI);
4783}
4784
4786 MachineInstr *LHS,
4787 MachineInstr *RHS,
4788 BuildFnTy &MatchInfo) {
4789 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4790 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4791 if (!LHSPtrAdd)
4792 return false;
4793
4794 Register Src2Reg = MI.getOperand(2).getReg();
4795 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4796 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4797 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4798 if (!C1)
4799 return false;
4800 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4801 if (!C2)
4802 return false;
4803
4804 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4805 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4807 MI.getOperand(1).setReg(LHSSrc1);
4808 MI.getOperand(2).setReg(NewCst.getReg(0));
4810 };
4811 return !reassociationCanBreakAddressingModePattern(MI);
4812}
4813
4815 BuildFnTy &MatchInfo) {
4816 auto &PtrAdd = cast<GPtrAdd>(MI);
4817 // We're trying to match a few pointer computation patterns here for
4818 // re-association opportunities.
4819 // 1) Isolating a constant operand to be on the RHS, e.g.:
4820 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4821 //
4822 // 2) Folding two constants in each sub-tree as long as such folding
4823 // doesn't break a legal addressing mode.
4824 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4825 //
4826 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4827 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4828 // iif (G_PTR_ADD X, C) has one use.
4829 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4830 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4831
4832 // Try to match example 2.
4833 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4834 return true;
4835
4836 // Try to match example 3.
4837 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4838 return true;
4839
4840 // Try to match example 1.
4841 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4842 return true;
4843
4844 return false;
4845}
4847 Register OpLHS, Register OpRHS,
4848 BuildFnTy &MatchInfo) {
4849 LLT OpRHSTy = MRI.getType(OpRHS);
4850 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4851
4852 if (OpLHSDef->getOpcode() != Opc)
4853 return false;
4854
4855 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4856 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4857 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4858
4859 // If the inner op is (X op C), pull the constant out so it can be folded with
4860 // other constants in the expression tree. Folding is not guaranteed so we
4861 // might have (C1 op C2). In that case do not pull a constant out because it
4862 // won't help and can lead to infinite loops.
4865 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4866 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4867 MatchInfo = [=](MachineIRBuilder &B) {
4868 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4869 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4870 };
4871 return true;
4872 }
4873 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4874 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4875 // iff (op x, c1) has one use
4876 MatchInfo = [=](MachineIRBuilder &B) {
4877 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4878 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4879 };
4880 return true;
4881 }
4882 }
4883
4884 return false;
4885}
4886
4888 BuildFnTy &MatchInfo) {
4889 // We don't check if the reassociation will break a legal addressing mode
4890 // here since pointer arithmetic is handled by G_PTR_ADD.
4891 unsigned Opc = MI.getOpcode();
4892 Register DstReg = MI.getOperand(0).getReg();
4893 Register LHSReg = MI.getOperand(1).getReg();
4894 Register RHSReg = MI.getOperand(2).getReg();
4895
4896 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4897 return true;
4898 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4899 return true;
4900 return false;
4901}
4902
4904 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4905 Register SrcOp = MI.getOperand(1).getReg();
4906
4907 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4908 MatchInfo = *MaybeCst;
4909 return true;
4910 }
4911
4912 return false;
4913}
4914
4916 Register Op1 = MI.getOperand(1).getReg();
4917 Register Op2 = MI.getOperand(2).getReg();
4918 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4919 if (!MaybeCst)
4920 return false;
4921 MatchInfo = *MaybeCst;
4922 return true;
4923}
4924
4926 Register Op1 = MI.getOperand(1).getReg();
4927 Register Op2 = MI.getOperand(2).getReg();
4928 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4929 if (!MaybeCst)
4930 return false;
4931 MatchInfo =
4932 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4933 return true;
4934}
4935
4937 ConstantFP *&MatchInfo) {
4938 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4939 MI.getOpcode() == TargetOpcode::G_FMAD);
4940 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4941
4942 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4943 if (!Op3Cst)
4944 return false;
4945
4946 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4947 if (!Op2Cst)
4948 return false;
4949
4950 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4951 if (!Op1Cst)
4952 return false;
4953
4954 APFloat Op1F = Op1Cst->getValueAPF();
4955 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4957 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4958 return true;
4959}
4960
4962 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4963 // Look for a binop feeding into an AND with a mask:
4964 //
4965 // %add = G_ADD %lhs, %rhs
4966 // %and = G_AND %add, 000...11111111
4967 //
4968 // Check if it's possible to perform the binop at a narrower width and zext
4969 // back to the original width like so:
4970 //
4971 // %narrow_lhs = G_TRUNC %lhs
4972 // %narrow_rhs = G_TRUNC %rhs
4973 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4974 // %new_add = G_ZEXT %narrow_add
4975 // %and = G_AND %new_add, 000...11111111
4976 //
4977 // This can allow later combines to eliminate the G_AND if it turns out
4978 // that the mask is irrelevant.
4979 assert(MI.getOpcode() == TargetOpcode::G_AND);
4980 Register Dst = MI.getOperand(0).getReg();
4981 Register AndLHS = MI.getOperand(1).getReg();
4982 Register AndRHS = MI.getOperand(2).getReg();
4983 LLT WideTy = MRI.getType(Dst);
4984
4985 // If the potential binop has more than one use, then it's possible that one
4986 // of those uses will need its full width.
4987 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4988 return false;
4989
4990 // Check if the LHS feeding the AND is impacted by the high bits that we're
4991 // masking out.
4992 //
4993 // e.g. for 64-bit x, y:
4994 //
4995 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4996 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4997 if (!LHSInst)
4998 return false;
4999 unsigned LHSOpc = LHSInst->getOpcode();
5000 switch (LHSOpc) {
5001 default:
5002 return false;
5003 case TargetOpcode::G_ADD:
5004 case TargetOpcode::G_SUB:
5005 case TargetOpcode::G_MUL:
5006 case TargetOpcode::G_AND:
5007 case TargetOpcode::G_OR:
5008 case TargetOpcode::G_XOR:
5009 break;
5010 }
5011
5012 // Find the mask on the RHS.
5013 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5014 if (!Cst)
5015 return false;
5016 auto Mask = Cst->Value;
5017 if (!Mask.isMask())
5018 return false;
5019
5020 // No point in combining if there's nothing to truncate.
5021 unsigned NarrowWidth = Mask.countr_one();
5022 if (NarrowWidth == WideTy.getSizeInBits())
5023 return false;
5024 LLT NarrowTy = LLT::scalar(NarrowWidth);
5025
5026 // Check if adding the zext + truncates could be harmful.
5027 auto &MF = *MI.getMF();
5028 const auto &TLI = getTargetLowering();
5029 LLVMContext &Ctx = MF.getFunction().getContext();
5030 auto &DL = MF.getDataLayout();
5031 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
5032 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
5033 return false;
5034 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5035 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5036 return false;
5037 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5038 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5039 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5040 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5041 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5042 auto NarrowBinOp =
5043 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5044 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5046 MI.getOperand(1).setReg(Ext.getReg(0));
5048 };
5049 return true;
5050}
5051
5053 unsigned Opc = MI.getOpcode();
5054 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5055
5056 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5057 return false;
5058
5059 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5061 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5062 : TargetOpcode::G_SADDO;
5063 MI.setDesc(Builder.getTII().get(NewOpc));
5064 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5066 };
5067 return true;
5068}
5069
5071 // (G_*MULO x, 0) -> 0 + no carry out
5072 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5073 MI.getOpcode() == TargetOpcode::G_SMULO);
5074 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5075 return false;
5076 Register Dst = MI.getOperand(0).getReg();
5077 Register Carry = MI.getOperand(1).getReg();
5080 return false;
5081 MatchInfo = [=](MachineIRBuilder &B) {
5082 B.buildConstant(Dst, 0);
5083 B.buildConstant(Carry, 0);
5084 };
5085 return true;
5086}
5087
5089 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5090 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5091 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5092 MI.getOpcode() == TargetOpcode::G_SADDE ||
5093 MI.getOpcode() == TargetOpcode::G_USUBE ||
5094 MI.getOpcode() == TargetOpcode::G_SSUBE);
5095 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5096 return false;
5097 MatchInfo = [&](MachineIRBuilder &B) {
5098 unsigned NewOpcode;
5099 switch (MI.getOpcode()) {
5100 case TargetOpcode::G_UADDE:
5101 NewOpcode = TargetOpcode::G_UADDO;
5102 break;
5103 case TargetOpcode::G_SADDE:
5104 NewOpcode = TargetOpcode::G_SADDO;
5105 break;
5106 case TargetOpcode::G_USUBE:
5107 NewOpcode = TargetOpcode::G_USUBO;
5108 break;
5109 case TargetOpcode::G_SSUBE:
5110 NewOpcode = TargetOpcode::G_SSUBO;
5111 break;
5112 }
5114 MI.setDesc(B.getTII().get(NewOpcode));
5115 MI.removeOperand(4);
5117 };
5118 return true;
5119}
5120
5122 BuildFnTy &MatchInfo) {
5123 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5124 Register Dst = MI.getOperand(0).getReg();
5125 // (x + y) - z -> x (if y == z)
5126 // (x + y) - z -> y (if x == z)
5127 Register X, Y, Z;
5128 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5129 Register ReplaceReg;
5130 int64_t CstX, CstY;
5131 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5133 ReplaceReg = X;
5134 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5136 ReplaceReg = Y;
5137 if (ReplaceReg) {
5138 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5139 return true;
5140 }
5141 }
5142
5143 // x - (y + z) -> 0 - y (if x == z)
5144 // x - (y + z) -> 0 - z (if x == y)
5145 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5146 Register ReplaceReg;
5147 int64_t CstX;
5148 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5150 ReplaceReg = Y;
5151 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5153 ReplaceReg = Z;
5154 if (ReplaceReg) {
5155 MatchInfo = [=](MachineIRBuilder &B) {
5156 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5157 B.buildSub(Dst, Zero, ReplaceReg);
5158 };
5159 return true;
5160 }
5161 }
5162 return false;
5163}
5164
5166 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5167 auto &UDiv = cast<GenericMachineInstr>(MI);
5168 Register Dst = UDiv.getReg(0);
5169 Register LHS = UDiv.getReg(1);
5170 Register RHS = UDiv.getReg(2);
5171 LLT Ty = MRI.getType(Dst);
5172 LLT ScalarTy = Ty.getScalarType();
5173 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5175 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5176
5177 unsigned KnownLeadingZeros =
5179 auto &MIB = Builder;
5180
5181 bool UseNPQ = false;
5182 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5183
5184 auto BuildUDIVPattern = [&](const Constant *C) {
5185 auto *CI = cast<ConstantInt>(C);
5186 const APInt &Divisor = CI->getValue();
5187
5188 bool SelNPQ = false;
5189 APInt Magic(Divisor.getBitWidth(), 0);
5190 unsigned PreShift = 0, PostShift = 0;
5191
5192 // Magic algorithm doesn't work for division by 1. We need to emit a select
5193 // at the end.
5194 // TODO: Use undef values for divisor of 1.
5195 if (!Divisor.isOne()) {
5196
5197 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5198 // in the dividend exceeds the leading zeros for the divisor.
5201 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5202
5203 Magic = std::move(magics.Magic);
5204
5205 assert(magics.PreShift < Divisor.getBitWidth() &&
5206 "We shouldn't generate an undefined shift!");
5207 assert(magics.PostShift < Divisor.getBitWidth() &&
5208 "We shouldn't generate an undefined shift!");
5209 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5210 PreShift = magics.PreShift;
5211 PostShift = magics.PostShift;
5212 SelNPQ = magics.IsAdd;
5213 }
5214
5215 PreShifts.push_back(
5216 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5217 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5218 NPQFactors.push_back(
5219 MIB.buildConstant(ScalarTy,
5220 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5221 : APInt::getZero(EltBits))
5222 .getReg(0));
5223 PostShifts.push_back(
5224 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5225 UseNPQ |= SelNPQ;
5226 return true;
5227 };
5228
5229 // Collect the shifts/magic values from each element.
5230 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5231 (void)Matched;
5232 assert(Matched && "Expected unary predicate match to succeed");
5233
5234 Register PreShift, PostShift, MagicFactor, NPQFactor;
5235 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5236 if (RHSDef) {
5237 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5238 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5239 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5240 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5241 } else {
5243 "Non-build_vector operation should have been a scalar");
5244 PreShift = PreShifts[0];
5245 MagicFactor = MagicFactors[0];
5246 PostShift = PostShifts[0];
5247 }
5248
5249 Register Q = LHS;
5250 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5251
5252 // Multiply the numerator (operand 0) by the magic value.
5253 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5254
5255 if (UseNPQ) {
5256 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5257
5258 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5259 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5260 if (Ty.isVector())
5261 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5262 else
5263 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5264
5265 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5266 }
5267
5268 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5269 auto One = MIB.buildConstant(Ty, 1);
5270 auto IsOne = MIB.buildICmp(
5272 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5273 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5274}
5275
5277 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5278 Register Dst = MI.getOperand(0).getReg();
5279 Register RHS = MI.getOperand(2).getReg();
5280 LLT DstTy = MRI.getType(Dst);
5281 auto *RHSDef = MRI.getVRegDef(RHS);
5282 if (!isConstantOrConstantVector(*RHSDef, MRI))
5283 return false;
5284
5285 auto &MF = *MI.getMF();
5286 AttributeList Attr = MF.getFunction().getAttributes();
5287 const auto &TLI = getTargetLowering();
5288 LLVMContext &Ctx = MF.getFunction().getContext();
5289 auto &DL = MF.getDataLayout();
5290 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5291 return false;
5292
5293 // Don't do this for minsize because the instruction sequence is usually
5294 // larger.
5295 if (MF.getFunction().hasMinSize())
5296 return false;
5297
5298 // Don't do this if the types are not going to be legal.
5299 if (LI) {
5300 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5301 return false;
5302 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5303 return false;
5305 {TargetOpcode::G_ICMP,
5306 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5307 DstTy}}))
5308 return false;
5309 }
5310
5311 return matchUnaryPredicate(
5312 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5313}
5314
5316 auto *NewMI = buildUDivUsingMul(MI);
5317 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5318}
5319
5321 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5322 Register Dst = MI.getOperand(0).getReg();
5323 Register RHS = MI.getOperand(2).getReg();
5324 LLT DstTy = MRI.getType(Dst);
5325
5326 auto &MF = *MI.getMF();
5327 AttributeList Attr = MF.getFunction().getAttributes();
5328 const auto &TLI = getTargetLowering();
5329 LLVMContext &Ctx = MF.getFunction().getContext();
5330 auto &DL = MF.getDataLayout();
5331 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5332 return false;
5333
5334 // Don't do this for minsize because the instruction sequence is usually
5335 // larger.
5336 if (MF.getFunction().hasMinSize())
5337 return false;
5338
5339 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5340 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5341 return matchUnaryPredicate(
5342 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5343 }
5344
5345 // Don't support the general case for now.
5346 return false;
5347}
5348
5350 auto *NewMI = buildSDivUsingMul(MI);
5351 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5352}
5353
5355 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5356 auto &SDiv = cast<GenericMachineInstr>(MI);
5357 Register Dst = SDiv.getReg(0);
5358 Register LHS = SDiv.getReg(1);
5359 Register RHS = SDiv.getReg(2);
5360 LLT Ty = MRI.getType(Dst);
5361 LLT ScalarTy = Ty.getScalarType();
5363 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5364 auto &MIB = Builder;
5365
5366 bool UseSRA = false;
5367 SmallVector<Register, 16> Shifts, Factors;
5368
5369 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5370 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5371
5372 auto BuildSDIVPattern = [&](const Constant *C) {
5373 // Don't recompute inverses for each splat element.
5374 if (IsSplat && !Factors.empty()) {
5375 Shifts.push_back(Shifts[0]);
5376 Factors.push_back(Factors[0]);
5377 return true;
5378 }
5379
5380 auto *CI = cast<ConstantInt>(C);
5381 APInt Divisor = CI->getValue();
5382 unsigned Shift = Divisor.countr_zero();
5383 if (Shift) {
5384 Divisor.ashrInPlace(Shift);
5385 UseSRA = true;
5386 }
5387
5388 // Calculate the multiplicative inverse modulo BW.
5389 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5390 APInt Factor = Divisor.multiplicativeInverse();
5391 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5392 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5393 return true;
5394 };
5395
5396 // Collect all magic values from the build vector.
5397 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5398 (void)Matched;
5399 assert(Matched && "Expected unary predicate match to succeed");
5400
5401 Register Shift, Factor;
5402 if (Ty.isVector()) {
5403 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5404 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5405 } else {
5406 Shift = Shifts[0];
5407 Factor = Factors[0];
5408 }
5409
5410 Register Res = LHS;
5411
5412 if (UseSRA)
5413 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5414
5415 return MIB.buildMul(Ty, Res, Factor);
5416}
5417
5419 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5420 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5421 "Expected SDIV or UDIV");
5422 auto &Div = cast<GenericMachineInstr>(MI);
5423 Register RHS = Div.getReg(2);
5424 auto MatchPow2 = [&](const Constant *C) {
5425 auto *CI = dyn_cast<ConstantInt>(C);
5426 return CI && (CI->getValue().isPowerOf2() ||
5427 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5428 };
5429 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5430}
5431
5433 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5434 auto &SDiv = cast<GenericMachineInstr>(MI);
5435 Register Dst = SDiv.getReg(0);
5436 Register LHS = SDiv.getReg(1);
5437 Register RHS = SDiv.getReg(2);
5438 LLT Ty = MRI.getType(Dst);
5440 LLT CCVT =
5441 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5442
5443 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5444 // to the following version:
5445 //
5446 // %c1 = G_CTTZ %rhs
5447 // %inexact = G_SUB $bitwidth, %c1
5448 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5449 // %lshr = G_LSHR %sign, %inexact
5450 // %add = G_ADD %lhs, %lshr
5451 // %ashr = G_ASHR %add, %c1
5452 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5453 // %zero = G_CONSTANT $0
5454 // %neg = G_NEG %ashr
5455 // %isneg = G_ICMP SLT %rhs, %zero
5456 // %res = G_SELECT %isneg, %neg, %ashr
5457
5458 unsigned BitWidth = Ty.getScalarSizeInBits();
5459 auto Zero = Builder.buildConstant(Ty, 0);
5460
5461 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5462 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5463 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5464 // Splat the sign bit into the register
5465 auto Sign = Builder.buildAShr(
5466 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5467
5468 // Add (LHS < 0) ? abs2 - 1 : 0;
5469 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5470 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5471 auto AShr = Builder.buildAShr(Ty, Add, C1);
5472
5473 // Special case: (sdiv X, 1) -> X
5474 // Special Case: (sdiv X, -1) -> 0-X
5475 auto One = Builder.buildConstant(Ty, 1);
5476 auto MinusOne = Builder.buildConstant(Ty, -1);
5477 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5478 auto IsMinusOne =
5480 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5481 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5482
5483 // If divided by a positive value, we're done. Otherwise, the result must be
5484 // negated.
5485 auto Neg = Builder.buildNeg(Ty, AShr);
5486 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5487 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5488 MI.eraseFromParent();
5489}
5490
5492 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5493 auto &UDiv = cast<GenericMachineInstr>(MI);
5494 Register Dst = UDiv.getReg(0);
5495 Register LHS = UDiv.getReg(1);
5496 Register RHS = UDiv.getReg(2);
5497 LLT Ty = MRI.getType(Dst);
5499
5500 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5501 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5502 MI.eraseFromParent();
5503}
5504
5506 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5507 Register RHS = MI.getOperand(2).getReg();
5508 Register Dst = MI.getOperand(0).getReg();
5509 LLT Ty = MRI.getType(Dst);
5511 auto MatchPow2ExceptOne = [&](const Constant *C) {
5512 if (auto *CI = dyn_cast<ConstantInt>(C))
5513 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5514 return false;
5515 };
5516 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5517 return false;
5518 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5519}
5520
5522 Register LHS = MI.getOperand(1).getReg();
5523 Register RHS = MI.getOperand(2).getReg();
5524 Register Dst = MI.getOperand(0).getReg();
5525 LLT Ty = MRI.getType(Dst);
5527 unsigned NumEltBits = Ty.getScalarSizeInBits();
5528
5529 auto LogBase2 = buildLogBase2(RHS, Builder);
5530 auto ShiftAmt =
5531 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5532 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5533 Builder.buildLShr(Dst, LHS, Trunc);
5534 MI.eraseFromParent();
5535}
5536
5538 BuildFnTy &MatchInfo) {
5539 unsigned Opc = MI.getOpcode();
5540 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5541 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5542 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5543
5544 Register Dst = MI.getOperand(0).getReg();
5545 Register X = MI.getOperand(1).getReg();
5546 Register Y = MI.getOperand(2).getReg();
5547 LLT Type = MRI.getType(Dst);
5548
5549 // fold (fadd x, fneg(y)) -> (fsub x, y)
5550 // fold (fadd fneg(y), x) -> (fsub x, y)
5551 // G_ADD is commutative so both cases are checked by m_GFAdd
5552 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5553 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5554 Opc = TargetOpcode::G_FSUB;
5555 }
5556 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5557 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5558 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5559 Opc = TargetOpcode::G_FADD;
5560 }
5561 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5562 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5563 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5564 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5565 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5566 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5567 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5568 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5569 // no opcode change
5570 } else
5571 return false;
5572
5573 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5575 MI.setDesc(B.getTII().get(Opc));
5576 MI.getOperand(1).setReg(X);
5577 MI.getOperand(2).setReg(Y);
5579 };
5580 return true;
5581}
5582
5584 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5585
5586 Register LHS = MI.getOperand(1).getReg();
5587 MatchInfo = MI.getOperand(2).getReg();
5588 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5589
5590 const auto LHSCst = Ty.isVector()
5591 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5593 if (!LHSCst)
5594 return false;
5595
5596 // -0.0 is always allowed
5597 if (LHSCst->Value.isNegZero())
5598 return true;
5599
5600 // +0.0 is only allowed if nsz is set.
5601 if (LHSCst->Value.isPosZero())
5602 return MI.getFlag(MachineInstr::FmNsz);
5603
5604 return false;
5605}
5606
5608 Register Dst = MI.getOperand(0).getReg();
5610 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5611 eraseInst(MI);
5612}
5613
5614/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5615/// due to global flags or MachineInstr flags.
5616static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5617 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5618 return false;
5619 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5620}
5621
5622static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5623 const MachineRegisterInfo &MRI) {
5624 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5625 MRI.use_instr_nodbg_end()) >
5626 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5627 MRI.use_instr_nodbg_end());
5628}
5629
5631 bool &AllowFusionGlobally,
5632 bool &HasFMAD, bool &Aggressive,
5633 bool CanReassociate) {
5634
5635 auto *MF = MI.getMF();
5636 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5637 const TargetOptions &Options = MF->getTarget().Options;
5638 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5639
5640 if (CanReassociate &&
5641 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5642 return false;
5643
5644 // Floating-point multiply-add with intermediate rounding.
5645 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5646 // Floating-point multiply-add without intermediate rounding.
5647 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5648 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5649 // No valid opcode, do not combine.
5650 if (!HasFMAD && !HasFMA)
5651 return false;
5652
5653 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5654 Options.UnsafeFPMath || HasFMAD;
5655 // If the addition is not contractable, do not combine.
5656 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5657 return false;
5658
5659 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5660 return true;
5661}
5662
5664 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5665 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5666
5667 bool AllowFusionGlobally, HasFMAD, Aggressive;
5668 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5669 return false;
5670
5671 Register Op1 = MI.getOperand(1).getReg();
5672 Register Op2 = MI.getOperand(2).getReg();
5675 unsigned PreferredFusedOpcode =
5676 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5677
5678 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5679 // prefer to fold the multiply with fewer uses.
5680 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5681 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5682 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5683 std::swap(LHS, RHS);
5684 }
5685
5686 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5687 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5688 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5689 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5690 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5691 {LHS.MI->getOperand(1).getReg(),
5692 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5693 };
5694 return true;
5695 }
5696
5697 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5698 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5699 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5700 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5701 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5702 {RHS.MI->getOperand(1).getReg(),
5703 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5704 };
5705 return true;
5706 }
5707
5708 return false;
5709}
5710
5712 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5713 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5714
5715 bool AllowFusionGlobally, HasFMAD, Aggressive;
5716 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5717 return false;
5718
5719 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5720 Register Op1 = MI.getOperand(1).getReg();
5721 Register Op2 = MI.getOperand(2).getReg();
5724 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5725
5726 unsigned PreferredFusedOpcode =
5727 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5728
5729 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5730 // prefer to fold the multiply with fewer uses.
5731 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5732 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5733 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5734 std::swap(LHS, RHS);
5735 }
5736
5737 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5738 MachineInstr *FpExtSrc;
5739 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5740 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5741 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5742 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5743 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5744 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5745 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5746 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5747 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5748 };
5749 return true;
5750 }
5751
5752 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5753 // Note: Commutes FADD operands.
5754 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5755 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5756 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5757 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5758 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5759 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5760 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5761 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5762 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5763 };
5764 return true;
5765 }
5766
5767 return false;
5768}
5769
5771 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5772 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5773
5774 bool AllowFusionGlobally, HasFMAD, Aggressive;
5775 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5776 return false;
5777
5778 Register Op1 = MI.getOperand(1).getReg();
5779 Register Op2 = MI.getOperand(2).getReg();
5782 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5783
5784 unsigned PreferredFusedOpcode =
5785 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5786
5787 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5788 // prefer to fold the multiply with fewer uses.
5789 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5790 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5791 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5792 std::swap(LHS, RHS);
5793 }
5794
5795 MachineInstr *FMA = nullptr;
5796 Register Z;
5797 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5798 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5799 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5800 TargetOpcode::G_FMUL) &&
5801 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5802 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5803 FMA = LHS.MI;
5804 Z = RHS.Reg;
5805 }
5806 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5807 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5808 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5809 TargetOpcode::G_FMUL) &&
5810 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5811 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5812 Z = LHS.Reg;
5813 FMA = RHS.MI;
5814 }
5815
5816 if (FMA) {
5817 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5818 Register X = FMA->getOperand(1).getReg();
5819 Register Y = FMA->getOperand(2).getReg();
5820 Register U = FMulMI->getOperand(1).getReg();
5821 Register V = FMulMI->getOperand(2).getReg();
5822
5823 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5824 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5825 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5826 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5827 {X, Y, InnerFMA});
5828 };
5829 return true;
5830 }
5831
5832 return false;
5833}
5834
5836 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5837 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5838
5839 bool AllowFusionGlobally, HasFMAD, Aggressive;
5840 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5841 return false;
5842
5843 if (!Aggressive)
5844 return false;
5845
5846 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5847 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5848 Register Op1 = MI.getOperand(1).getReg();
5849 Register Op2 = MI.getOperand(2).getReg();
5852
5853 unsigned PreferredFusedOpcode =
5854 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5855
5856 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5857 // prefer to fold the multiply with fewer uses.
5858 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5859 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5860 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5861 std::swap(LHS, RHS);
5862 }
5863
5864 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5865 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5867 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5868 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5869 Register InnerFMA =
5870 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5871 .getReg(0);
5872 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5873 {X, Y, InnerFMA});
5874 };
5875
5876 MachineInstr *FMulMI, *FMAMI;
5877 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5878 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5879 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5880 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5881 m_GFPExt(m_MInstr(FMulMI))) &&
5882 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5883 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5884 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5885 MatchInfo = [=](MachineIRBuilder &B) {
5886 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5887 FMulMI->getOperand(2).getReg(), RHS.Reg,
5888 LHS.MI->getOperand(1).getReg(),
5889 LHS.MI->getOperand(2).getReg(), B);
5890 };
5891 return true;
5892 }
5893
5894 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5895 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5896 // FIXME: This turns two single-precision and one double-precision
5897 // operation into two double-precision operations, which might not be
5898 // interesting for all targets, especially GPUs.
5899 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5900 FMAMI->getOpcode() == PreferredFusedOpcode) {
5901 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5902 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5903 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5904 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5905 MatchInfo = [=](MachineIRBuilder &B) {
5906 Register X = FMAMI->getOperand(1).getReg();
5907 Register Y = FMAMI->getOperand(2).getReg();
5908 X = B.buildFPExt(DstType, X).getReg(0);
5909 Y = B.buildFPExt(DstType, Y).getReg(0);
5910 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5911 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5912 };
5913
5914 return true;
5915 }
5916 }
5917
5918 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5919 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5920 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5921 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5922 m_GFPExt(m_MInstr(FMulMI))) &&
5923 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5924 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5925 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5926 MatchInfo = [=](MachineIRBuilder &B) {
5927 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5928 FMulMI->getOperand(2).getReg(), LHS.Reg,
5929 RHS.MI->getOperand(1).getReg(),
5930 RHS.MI->getOperand(2).getReg(), B);
5931 };
5932 return true;
5933 }
5934
5935 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5936 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5937 // FIXME: This turns two single-precision and one double-precision
5938 // operation into two double-precision operations, which might not be
5939 // interesting for all targets, especially GPUs.
5940 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5941 FMAMI->getOpcode() == PreferredFusedOpcode) {
5942 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5943 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5944 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5945 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5946 MatchInfo = [=](MachineIRBuilder &B) {
5947 Register X = FMAMI->getOperand(1).getReg();
5948 Register Y = FMAMI->getOperand(2).getReg();
5949 X = B.buildFPExt(DstType, X).getReg(0);
5950 Y = B.buildFPExt(DstType, Y).getReg(0);
5951 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5952 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5953 };
5954 return true;
5955 }
5956 }
5957
5958 return false;
5959}
5960
5962 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5963 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5964
5965 bool AllowFusionGlobally, HasFMAD, Aggressive;
5966 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5967 return false;
5968
5969 Register Op1 = MI.getOperand(1).getReg();
5970 Register Op2 = MI.getOperand(2).getReg();
5973 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5974
5975 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5976 // prefer to fold the multiply with fewer uses.
5977 int FirstMulHasFewerUses = true;
5978 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5979 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5980 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5981 FirstMulHasFewerUses = false;
5982
5983 unsigned PreferredFusedOpcode =
5984 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5985
5986 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5987 if (FirstMulHasFewerUses &&
5988 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5989 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5990 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5991 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5992 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5993 {LHS.MI->getOperand(1).getReg(),
5994 LHS.MI->getOperand(2).getReg(), NegZ});
5995 };
5996 return true;
5997 }
5998 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5999 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6000 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6001 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6002 Register NegY =
6003 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6004 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6005 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6006 };
6007 return true;
6008 }
6009
6010 return false;
6011}
6012
6014 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6015 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6016
6017 bool AllowFusionGlobally, HasFMAD, Aggressive;
6018 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6019 return false;
6020
6021 Register LHSReg = MI.getOperand(1).getReg();
6022 Register RHSReg = MI.getOperand(2).getReg();
6023 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6024
6025 unsigned PreferredFusedOpcode =
6026 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6027
6028 MachineInstr *FMulMI;
6029 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6030 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6031 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6032 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6033 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6034 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6035 Register NegX =
6036 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6037 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6038 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6039 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6040 };
6041 return true;
6042 }
6043
6044 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6045 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6046 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6047 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6048 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6049 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6050 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6051 {FMulMI->getOperand(1).getReg(),
6052 FMulMI->getOperand(2).getReg(), LHSReg});
6053 };
6054 return true;
6055 }
6056
6057 return false;
6058}
6059
6061 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6062 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6063
6064 bool AllowFusionGlobally, HasFMAD, Aggressive;
6065 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6066 return false;
6067
6068 Register LHSReg = MI.getOperand(1).getReg();
6069 Register RHSReg = MI.getOperand(2).getReg();
6070 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6071
6072 unsigned PreferredFusedOpcode =
6073 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6074
6075 MachineInstr *FMulMI;
6076 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6077 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6078 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6079 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6080 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6081 Register FpExtX =
6082 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6083 Register FpExtY =
6084 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6085 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6086 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6087 {FpExtX, FpExtY, NegZ});
6088 };
6089 return true;
6090 }
6091
6092 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6093 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6094 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6095 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6096 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6097 Register FpExtY =
6098 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6099 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6100 Register FpExtZ =
6101 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6102 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6103 {NegY, FpExtZ, LHSReg});
6104 };
6105 return true;
6106 }
6107
6108 return false;
6109}
6110
6112 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6113 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6114
6115 bool AllowFusionGlobally, HasFMAD, Aggressive;
6116 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6117 return false;
6118
6119 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6120 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6121 Register LHSReg = MI.getOperand(1).getReg();
6122 Register RHSReg = MI.getOperand(2).getReg();
6123
6124 unsigned PreferredFusedOpcode =
6125 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6126
6127 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6129 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6130 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6131 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6132 };
6133
6134 MachineInstr *FMulMI;
6135 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6136 // (fneg (fma (fpext x), (fpext y), z))
6137 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6138 // (fneg (fma (fpext x), (fpext y), z))
6139 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6140 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6141 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6142 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6143 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6144 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6146 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6147 FMulMI->getOperand(2).getReg(), RHSReg, B);
6148 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6149 };
6150 return true;
6151 }
6152
6153 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6154 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6155 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6156 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6157 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6158 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6159 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6160 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6161 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6162 FMulMI->getOperand(2).getReg(), LHSReg, B);
6163 };
6164 return true;
6165 }
6166
6167 return false;
6168}
6169
6171 unsigned &IdxToPropagate) {
6172 bool PropagateNaN;
6173 switch (MI.getOpcode()) {
6174 default:
6175 return false;
6176 case TargetOpcode::G_FMINNUM:
6177 case TargetOpcode::G_FMAXNUM:
6178 PropagateNaN = false;
6179 break;
6180 case TargetOpcode::G_FMINIMUM:
6181 case TargetOpcode::G_FMAXIMUM:
6182 PropagateNaN = true;
6183 break;
6184 }
6185
6186 auto MatchNaN = [&](unsigned Idx) {
6187 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6188 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6189 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6190 return false;
6191 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6192 return true;
6193 };
6194
6195 return MatchNaN(1) || MatchNaN(2);
6196}
6197
6199 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6200 Register LHS = MI.getOperand(1).getReg();
6201 Register RHS = MI.getOperand(2).getReg();
6202
6203 // Helper lambda to check for opportunities for
6204 // A + (B - A) -> B
6205 // (B - A) + A -> B
6206 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6207 Register Reg;
6208 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6209 Reg == MaybeSameReg;
6210 };
6211 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6212}
6213
6215 Register &MatchInfo) {
6216 // This combine folds the following patterns:
6217 //
6218 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6219 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6220 // into
6221 // x
6222 // if
6223 // k == sizeof(VecEltTy)/2
6224 // type(x) == type(dst)
6225 //
6226 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6227 // into
6228 // x
6229 // if
6230 // type(x) == type(dst)
6231
6232 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6233 LLT DstEltTy = DstVecTy.getElementType();
6234
6235 Register Lo, Hi;
6236
6237 if (mi_match(
6238 MI, MRI,
6240 MatchInfo = Lo;
6241 return MRI.getType(MatchInfo) == DstVecTy;
6242 }
6243
6244 std::optional<ValueAndVReg> ShiftAmount;
6245 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6246 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6247 if (mi_match(
6248 MI, MRI,
6249 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6250 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6251 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6252 MatchInfo = Lo;
6253 return MRI.getType(MatchInfo) == DstVecTy;
6254 }
6255 }
6256
6257 return false;
6258}
6259
6261 Register &MatchInfo) {
6262 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6263 // if type(x) == type(G_TRUNC)
6264 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6265 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6266 return false;
6267
6268 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6269}
6270
6272 Register &MatchInfo) {
6273 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6274 // y if K == size of vector element type
6275 std::optional<ValueAndVReg> ShiftAmt;
6276 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6278 m_GCst(ShiftAmt))))
6279 return false;
6280
6281 LLT MatchTy = MRI.getType(MatchInfo);
6282 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6283 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6284}
6285
6286unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6287 CmpInst::Predicate Pred, LLT DstTy,
6288 SelectPatternNaNBehaviour VsNaNRetVal) const {
6289 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6290 "Expected a NaN behaviour?");
6291 // Choose an opcode based off of legality or the behaviour when one of the
6292 // LHS/RHS may be NaN.
6293 switch (Pred) {
6294 default:
6295 return 0;
6296 case CmpInst::FCMP_UGT:
6297 case CmpInst::FCMP_UGE:
6298 case CmpInst::FCMP_OGT:
6299 case CmpInst::FCMP_OGE:
6300 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6301 return TargetOpcode::G_FMAXNUM;
6302 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6303 return TargetOpcode::G_FMAXIMUM;
6304 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6305 return TargetOpcode::G_FMAXNUM;
6306 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6307 return TargetOpcode::G_FMAXIMUM;
6308 return 0;
6309 case CmpInst::FCMP_ULT:
6310 case CmpInst::FCMP_ULE:
6311 case CmpInst::FCMP_OLT:
6312 case CmpInst::FCMP_OLE:
6313 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6314 return TargetOpcode::G_FMINNUM;
6315 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6316 return TargetOpcode::G_FMINIMUM;
6317 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6318 return TargetOpcode::G_FMINNUM;
6319 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6320 return 0;
6321 return TargetOpcode::G_FMINIMUM;
6322 }
6323}
6324
6325CombinerHelper::SelectPatternNaNBehaviour
6326CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6327 bool IsOrderedComparison) const {
6328 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6329 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6330 // Completely unsafe.
6331 if (!LHSSafe && !RHSSafe)
6332 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6333 if (LHSSafe && RHSSafe)
6334 return SelectPatternNaNBehaviour::RETURNS_ANY;
6335 // An ordered comparison will return false when given a NaN, so it
6336 // returns the RHS.
6337 if (IsOrderedComparison)
6338 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6339 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6340 // An unordered comparison will return true when given a NaN, so it
6341 // returns the LHS.
6342 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6343 : SelectPatternNaNBehaviour::RETURNS_NAN;
6344}
6345
6346bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6347 Register TrueVal, Register FalseVal,
6348 BuildFnTy &MatchInfo) {
6349 // Match: select (fcmp cond x, y) x, y
6350 // select (fcmp cond x, y) y, x
6351 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6352 LLT DstTy = MRI.getType(Dst);
6353 // Bail out early on pointers, since we'll never want to fold to a min/max.
6354 if (DstTy.isPointer())
6355 return false;
6356 // Match a floating point compare with a less-than/greater-than predicate.
6357 // TODO: Allow multiple users of the compare if they are all selects.
6358 CmpInst::Predicate Pred;
6359 Register CmpLHS, CmpRHS;
6360 if (!mi_match(Cond, MRI,
6362 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6363 CmpInst::isEquality(Pred))
6364 return false;
6365 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6366 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6367 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6368 return false;
6369 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6370 std::swap(CmpLHS, CmpRHS);
6371 Pred = CmpInst::getSwappedPredicate(Pred);
6372 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6373 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6374 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6375 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6376 }
6377 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6378 return false;
6379 // Decide what type of max/min this should be based off of the predicate.
6380 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6381 if (!Opc || !isLegal({Opc, {DstTy}}))
6382 return false;
6383 // Comparisons between signed zero and zero may have different results...
6384 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6385 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6386 // We don't know if a comparison between two 0s will give us a consistent
6387 // result. Be conservative and only proceed if at least one side is
6388 // non-zero.
6389 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6390 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6391 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6392 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6393 return false;
6394 }
6395 }
6396 MatchInfo = [=](MachineIRBuilder &B) {
6397 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6398 };
6399 return true;
6400}
6401
6403 BuildFnTy &MatchInfo) {
6404 // TODO: Handle integer cases.
6405 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6406 // Condition may be fed by a truncated compare.
6407 Register Cond = MI.getOperand(1).getReg();
6408 Register MaybeTrunc;
6409 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6410 Cond = MaybeTrunc;
6411 Register Dst = MI.getOperand(0).getReg();
6412 Register TrueVal = MI.getOperand(2).getReg();
6413 Register FalseVal = MI.getOperand(3).getReg();
6414 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6415}
6416
6418 BuildFnTy &MatchInfo) {
6419 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6420 // (X + Y) == X --> Y == 0
6421 // (X + Y) != X --> Y != 0
6422 // (X - Y) == X --> Y == 0
6423 // (X - Y) != X --> Y != 0
6424 // (X ^ Y) == X --> Y == 0
6425 // (X ^ Y) != X --> Y != 0
6426 Register Dst = MI.getOperand(0).getReg();
6427 CmpInst::Predicate Pred;
6428 Register X, Y, OpLHS, OpRHS;
6429 bool MatchedSub = mi_match(
6430 Dst, MRI,
6431 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6432 if (MatchedSub && X != OpLHS)
6433 return false;
6434 if (!MatchedSub) {
6435 if (!mi_match(Dst, MRI,
6436 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6437 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6438 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6439 return false;
6440 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6441 }
6442 MatchInfo = [=](MachineIRBuilder &B) {
6443 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6444 B.buildICmp(Pred, Dst, Y, Zero);
6445 };
6446 return CmpInst::isEquality(Pred) && Y.isValid();
6447}
6448
6450 Register ShiftReg = MI.getOperand(2).getReg();
6451 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6452 auto IsShiftTooBig = [&](const Constant *C) {
6453 auto *CI = dyn_cast<ConstantInt>(C);
6454 return CI && CI->uge(ResTy.getScalarSizeInBits());
6455 };
6456 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6457}
6458
6460 unsigned LHSOpndIdx = 1;
6461 unsigned RHSOpndIdx = 2;
6462 switch (MI.getOpcode()) {
6463 case TargetOpcode::G_UADDO:
6464 case TargetOpcode::G_SADDO:
6465 case TargetOpcode::G_UMULO:
6466 case TargetOpcode::G_SMULO:
6467 LHSOpndIdx = 2;
6468 RHSOpndIdx = 3;
6469 break;
6470 default:
6471 break;
6472 }
6473 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6474 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6475 if (!getIConstantVRegVal(LHS, MRI)) {
6476 // Skip commuting if LHS is not a constant. But, LHS may be a
6477 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6478 // have a constant on the RHS.
6479 if (MRI.getVRegDef(LHS)->getOpcode() !=
6480 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6481 return false;
6482 }
6483 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6484 return MRI.getVRegDef(RHS)->getOpcode() !=
6485 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6487}
6488
6490 Register LHS = MI.getOperand(1).getReg();
6491 Register RHS = MI.getOperand(2).getReg();
6492 std::optional<FPValueAndVReg> ValAndVReg;
6493 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6494 return false;
6495 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6496}
6497
6500 unsigned LHSOpndIdx = 1;
6501 unsigned RHSOpndIdx = 2;
6502 switch (MI.getOpcode()) {
6503 case TargetOpcode::G_UADDO:
6504 case TargetOpcode::G_SADDO:
6505 case TargetOpcode::G_UMULO:
6506 case TargetOpcode::G_SMULO:
6507 LHSOpndIdx = 2;
6508 RHSOpndIdx = 3;
6509 break;
6510 default:
6511 break;
6512 }
6513 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6514 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6515 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6516 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6518}
6519
6520bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6521 LLT SrcTy = MRI.getType(Src);
6522 if (SrcTy.isFixedVector())
6523 return isConstantSplatVector(Src, 1, AllowUndefs);
6524 if (SrcTy.isScalar()) {
6525 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6526 return true;
6527 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6528 return IConstant && IConstant->Value == 1;
6529 }
6530 return false; // scalable vector
6531}
6532
6533bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6534 LLT SrcTy = MRI.getType(Src);
6535 if (SrcTy.isFixedVector())
6536 return isConstantSplatVector(Src, 0, AllowUndefs);
6537 if (SrcTy.isScalar()) {
6538 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6539 return true;
6540 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6541 return IConstant && IConstant->Value == 0;
6542 }
6543 return false; // scalable vector
6544}
6545
6546// Ignores COPYs during conformance checks.
6547// FIXME scalable vectors.
6548bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6549 bool AllowUndefs) {
6550 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6551 if (!BuildVector)
6552 return false;
6553 unsigned NumSources = BuildVector->getNumSources();
6554
6555 for (unsigned I = 0; I < NumSources; ++I) {
6556 GImplicitDef *ImplicitDef =
6557 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6558 if (ImplicitDef && AllowUndefs)
6559 continue;
6560 if (ImplicitDef && !AllowUndefs)
6561 return false;
6562 std::optional<ValueAndVReg> IConstant =
6564 if (IConstant && IConstant->Value == SplatValue)
6565 continue;
6566 return false;
6567 }
6568 return true;
6569}
6570
6571// Ignores COPYs during lookups.
6572// FIXME scalable vectors
6573std::optional<APInt>
6574CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6575 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6576 if (IConstant)
6577 return IConstant->Value;
6578
6579 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6580 if (!BuildVector)
6581 return std::nullopt;
6582 unsigned NumSources = BuildVector->getNumSources();
6583
6584 std::optional<APInt> Value = std::nullopt;
6585 for (unsigned I = 0; I < NumSources; ++I) {
6586 std::optional<ValueAndVReg> IConstant =
6588 if (!IConstant)
6589 return std::nullopt;
6590 if (!Value)
6591 Value = IConstant->Value;
6592 else if (*Value != IConstant->Value)
6593 return std::nullopt;
6594 }
6595 return Value;
6596}
6597
6598// FIXME G_SPLAT_VECTOR
6599bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6600 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6601 if (IConstant)
6602 return true;
6603
6604 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6605 if (!BuildVector)
6606 return false;
6607
6608 unsigned NumSources = BuildVector->getNumSources();
6609 for (unsigned I = 0; I < NumSources; ++I) {
6610 std::optional<ValueAndVReg> IConstant =
6612 if (!IConstant)
6613 return false;
6614 }
6615 return true;
6616}
6617
6618// TODO: use knownbits to determine zeros
6619bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6620 BuildFnTy &MatchInfo) {
6621 uint32_t Flags = Select->getFlags();
6622 Register Dest = Select->getReg(0);
6623 Register Cond = Select->getCondReg();
6624 Register True = Select->getTrueReg();
6625 Register False = Select->getFalseReg();
6626 LLT CondTy = MRI.getType(Select->getCondReg());
6627 LLT TrueTy = MRI.getType(Select->getTrueReg());
6628
6629 // We only do this combine for scalar boolean conditions.
6630 if (CondTy != LLT::scalar(1))
6631 return false;
6632
6633 if (TrueTy.isPointer())
6634 return false;
6635
6636 // Both are scalars.
6637 std::optional<ValueAndVReg> TrueOpt =
6639 std::optional<ValueAndVReg> FalseOpt =
6641
6642 if (!TrueOpt || !FalseOpt)
6643 return false;
6644
6645 APInt TrueValue = TrueOpt->Value;
6646 APInt FalseValue = FalseOpt->Value;
6647
6648 // select Cond, 1, 0 --> zext (Cond)
6649 if (TrueValue.isOne() && FalseValue.isZero()) {
6650 MatchInfo = [=](MachineIRBuilder &B) {
6651 B.setInstrAndDebugLoc(*Select);
6652 B.buildZExtOrTrunc(Dest, Cond);
6653 };
6654 return true;
6655 }
6656
6657 // select Cond, -1, 0 --> sext (Cond)
6658 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6659 MatchInfo = [=](MachineIRBuilder &B) {
6660 B.setInstrAndDebugLoc(*Select);
6661 B.buildSExtOrTrunc(Dest, Cond);
6662 };
6663 return true;
6664 }
6665
6666 // select Cond, 0, 1 --> zext (!Cond)
6667 if (TrueValue.isZero() && FalseValue.isOne()) {
6668 MatchInfo = [=](MachineIRBuilder &B) {
6669 B.setInstrAndDebugLoc(*Select);
6671 B.buildNot(Inner, Cond);
6672 B.buildZExtOrTrunc(Dest, Inner);
6673 };
6674 return true;
6675 }
6676
6677 // select Cond, 0, -1 --> sext (!Cond)
6678 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6679 MatchInfo = [=](MachineIRBuilder &B) {
6680 B.setInstrAndDebugLoc(*Select);
6682 B.buildNot(Inner, Cond);
6683 B.buildSExtOrTrunc(Dest, Inner);
6684 };
6685 return true;
6686 }
6687
6688 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6689 if (TrueValue - 1 == FalseValue) {
6690 MatchInfo = [=](MachineIRBuilder &B) {
6691 B.setInstrAndDebugLoc(*Select);
6693 B.buildZExtOrTrunc(Inner, Cond);
6694 B.buildAdd(Dest, Inner, False);
6695 };
6696 return true;
6697 }
6698
6699 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6700 if (TrueValue + 1 == FalseValue) {
6701 MatchInfo = [=](MachineIRBuilder &B) {
6702 B.setInstrAndDebugLoc(*Select);
6704 B.buildSExtOrTrunc(Inner, Cond);
6705 B.buildAdd(Dest, Inner, False);
6706 };
6707 return true;
6708 }
6709
6710 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6711 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6712 MatchInfo = [=](MachineIRBuilder &B) {
6713 B.setInstrAndDebugLoc(*Select);
6715 B.buildZExtOrTrunc(Inner, Cond);
6716 // The shift amount must be scalar.
6717 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6718 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6719 B.buildShl(Dest, Inner, ShAmtC, Flags);
6720 };
6721 return true;
6722 }
6723 // select Cond, -1, C --> or (sext Cond), C
6724 if (TrueValue.isAllOnes()) {
6725 MatchInfo = [=](MachineIRBuilder &B) {
6726 B.setInstrAndDebugLoc(*Select);
6728 B.buildSExtOrTrunc(Inner, Cond);
6729 B.buildOr(Dest, Inner, False, Flags);
6730 };
6731 return true;
6732 }
6733
6734 // select Cond, C, -1 --> or (sext (not Cond)), C
6735 if (FalseValue.isAllOnes()) {
6736 MatchInfo = [=](MachineIRBuilder &B) {
6737 B.setInstrAndDebugLoc(*Select);
6739 B.buildNot(Not, Cond);
6741 B.buildSExtOrTrunc(Inner, Not);
6742 B.buildOr(Dest, Inner, True, Flags);
6743 };
6744 return true;
6745 }
6746
6747 return false;
6748}
6749
6750// TODO: use knownbits to determine zeros
6751bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6752 BuildFnTy &MatchInfo) {
6753 uint32_t Flags = Select->getFlags();
6754 Register DstReg = Select->getReg(0);
6755 Register Cond = Select->getCondReg();
6756 Register True = Select->getTrueReg();
6757 Register False = Select->getFalseReg();
6758 LLT CondTy = MRI.getType(Select->getCondReg());
6759 LLT TrueTy = MRI.getType(Select->getTrueReg());
6760
6761 // Boolean or fixed vector of booleans.
6762 if (CondTy.isScalableVector() ||
6763 (CondTy.isFixedVector() &&
6764 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6765 CondTy.getScalarSizeInBits() != 1)
6766 return false;
6767
6768 if (CondTy != TrueTy)
6769 return false;
6770
6771 // select Cond, Cond, F --> or Cond, F
6772 // select Cond, 1, F --> or Cond, F
6773 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6774 MatchInfo = [=](MachineIRBuilder &B) {
6775 B.setInstrAndDebugLoc(*Select);
6777 B.buildZExtOrTrunc(Ext, Cond);
6778 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6779 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6780 };
6781 return true;
6782 }
6783
6784 // select Cond, T, Cond --> and Cond, T
6785 // select Cond, T, 0 --> and Cond, T
6786 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6787 MatchInfo = [=](MachineIRBuilder &B) {
6788 B.setInstrAndDebugLoc(*Select);
6790 B.buildZExtOrTrunc(Ext, Cond);
6791 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6792 B.buildAnd(DstReg, Ext, FreezeTrue);
6793 };
6794 return true;
6795 }
6796
6797 // select Cond, T, 1 --> or (not Cond), T
6798 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6799 MatchInfo = [=](MachineIRBuilder &B) {
6800 B.setInstrAndDebugLoc(*Select);
6801 // First the not.
6803 B.buildNot(Inner, Cond);
6804 // Then an ext to match the destination register.
6806 B.buildZExtOrTrunc(Ext, Inner);
6807 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6808 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6809 };
6810 return true;
6811 }
6812
6813 // select Cond, 0, F --> and (not Cond), F
6814 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6815 MatchInfo = [=](MachineIRBuilder &B) {
6816 B.setInstrAndDebugLoc(*Select);
6817 // First the not.
6819 B.buildNot(Inner, Cond);
6820 // Then an ext to match the destination register.
6822 B.buildZExtOrTrunc(Ext, Inner);
6823 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6824 B.buildAnd(DstReg, Ext, FreezeFalse);
6825 };
6826 return true;
6827 }
6828
6829 return false;
6830}
6831
6833 BuildFnTy &MatchInfo) {
6834 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
6835 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
6836
6837 Register DstReg = Select->getReg(0);
6838 Register True = Select->getTrueReg();
6839 Register False = Select->getFalseReg();
6840 LLT DstTy = MRI.getType(DstReg);
6841
6842 if (DstTy.isPointer())
6843 return false;
6844
6845 // We want to fold the icmp and replace the select.
6846 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6847 return false;
6848
6849 CmpInst::Predicate Pred = Cmp->getCond();
6850 // We need a larger or smaller predicate for
6851 // canonicalization.
6852 if (CmpInst::isEquality(Pred))
6853 return false;
6854
6855 Register CmpLHS = Cmp->getLHSReg();
6856 Register CmpRHS = Cmp->getRHSReg();
6857
6858 // We can swap CmpLHS and CmpRHS for higher hitrate.
6859 if (True == CmpRHS && False == CmpLHS) {
6860 std::swap(CmpLHS, CmpRHS);
6861 Pred = CmpInst::getSwappedPredicate(Pred);
6862 }
6863
6864 // (icmp X, Y) ? X : Y -> integer minmax.
6865 // see matchSelectPattern in ValueTracking.
6866 // Legality between G_SELECT and integer minmax can differ.
6867 if (True != CmpLHS || False != CmpRHS)
6868 return false;
6869
6870 switch (Pred) {
6871 case ICmpInst::ICMP_UGT:
6872 case ICmpInst::ICMP_UGE: {
6873 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6874 return false;
6875 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
6876 return true;
6877 }
6878 case ICmpInst::ICMP_SGT:
6879 case ICmpInst::ICMP_SGE: {
6880 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6881 return false;
6882 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
6883 return true;
6884 }
6885 case ICmpInst::ICMP_ULT:
6886 case ICmpInst::ICMP_ULE: {
6887 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6888 return false;
6889 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
6890 return true;
6891 }
6892 case ICmpInst::ICMP_SLT:
6893 case ICmpInst::ICMP_SLE: {
6894 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6895 return false;
6896 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
6897 return true;
6898 }
6899 default:
6900 return false;
6901 }
6902}
6903
6905 GSelect *Select = cast<GSelect>(&MI);
6906
6907 if (tryFoldSelectOfConstants(Select, MatchInfo))
6908 return true;
6909
6910 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6911 return true;
6912
6913 return false;
6914}
6915
6916/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6917/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6918/// into a single comparison using range-based reasoning.
6919/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6920bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6921 BuildFnTy &MatchInfo) {
6922 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6923 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6924 Register DstReg = Logic->getReg(0);
6925 Register LHS = Logic->getLHSReg();
6926 Register RHS = Logic->getRHSReg();
6927 unsigned Flags = Logic->getFlags();
6928
6929 // We need an G_ICMP on the LHS register.
6930 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6931 if (!Cmp1)
6932 return false;
6933
6934 // We need an G_ICMP on the RHS register.
6935 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6936 if (!Cmp2)
6937 return false;
6938
6939 // We want to fold the icmps.
6940 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6941 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6942 return false;
6943
6944 APInt C1;
6945 APInt C2;
6946 std::optional<ValueAndVReg> MaybeC1 =
6948 if (!MaybeC1)
6949 return false;
6950 C1 = MaybeC1->Value;
6951
6952 std::optional<ValueAndVReg> MaybeC2 =
6954 if (!MaybeC2)
6955 return false;
6956 C2 = MaybeC2->Value;
6957
6958 Register R1 = Cmp1->getLHSReg();
6959 Register R2 = Cmp2->getLHSReg();
6960 CmpInst::Predicate Pred1 = Cmp1->getCond();
6961 CmpInst::Predicate Pred2 = Cmp2->getCond();
6962 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6963 LLT CmpOperandTy = MRI.getType(R1);
6964
6965 if (CmpOperandTy.isPointer())
6966 return false;
6967
6968 // We build ands, adds, and constants of type CmpOperandTy.
6969 // They must be legal to build.
6970 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6971 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6972 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6973 return false;
6974
6975 // Look through add of a constant offset on R1, R2, or both operands. This
6976 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6977 std::optional<APInt> Offset1;
6978 std::optional<APInt> Offset2;
6979 if (R1 != R2) {
6980 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6981 std::optional<ValueAndVReg> MaybeOffset1 =
6983 if (MaybeOffset1) {
6984 R1 = Add->getLHSReg();
6985 Offset1 = MaybeOffset1->Value;
6986 }
6987 }
6988 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6989 std::optional<ValueAndVReg> MaybeOffset2 =
6991 if (MaybeOffset2) {
6992 R2 = Add->getLHSReg();
6993 Offset2 = MaybeOffset2->Value;
6994 }
6995 }
6996 }
6997
6998 if (R1 != R2)
6999 return false;
7000
7001 // We calculate the icmp ranges including maybe offsets.
7003 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7004 if (Offset1)
7005 CR1 = CR1.subtract(*Offset1);
7006
7008 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7009 if (Offset2)
7010 CR2 = CR2.subtract(*Offset2);
7011
7012 bool CreateMask = false;
7013 APInt LowerDiff;
7014 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7015 if (!CR) {
7016 // We need non-wrapping ranges.
7017 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7018 return false;
7019
7020 // Check whether we have equal-size ranges that only differ by one bit.
7021 // In that case we can apply a mask to map one range onto the other.
7022 LowerDiff = CR1.getLower() ^ CR2.getLower();
7023 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7024 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7025 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7026 CR1Size != CR2.getUpper() - CR2.getLower())
7027 return false;
7028
7029 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7030 CreateMask = true;
7031 }
7032
7033 if (IsAnd)
7034 CR = CR->inverse();
7035
7036 CmpInst::Predicate NewPred;
7037 APInt NewC, Offset;
7038 CR->getEquivalentICmp(NewPred, NewC, Offset);
7039
7040 // We take the result type of one of the original icmps, CmpTy, for
7041 // the to be build icmp. The operand type, CmpOperandTy, is used for
7042 // the other instructions and constants to be build. The types of
7043 // the parameters and output are the same for add and and. CmpTy
7044 // and the type of DstReg might differ. That is why we zext or trunc
7045 // the icmp into the destination register.
7046
7047 MatchInfo = [=](MachineIRBuilder &B) {
7048 if (CreateMask && Offset != 0) {
7049 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7050 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7051 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7052 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7053 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7054 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7055 B.buildZExtOrTrunc(DstReg, ICmp);
7056 } else if (CreateMask && Offset == 0) {
7057 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7058 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7059 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7060 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7061 B.buildZExtOrTrunc(DstReg, ICmp);
7062 } else if (!CreateMask && Offset != 0) {
7063 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7064 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7065 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7066 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7067 B.buildZExtOrTrunc(DstReg, ICmp);
7068 } else if (!CreateMask && Offset == 0) {
7069 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7070 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7071 B.buildZExtOrTrunc(DstReg, ICmp);
7072 } else {
7073 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7074 }
7075 };
7076 return true;
7077}
7078
7079bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7080 BuildFnTy &MatchInfo) {
7081 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7082 Register DestReg = Logic->getReg(0);
7083 Register LHS = Logic->getLHSReg();
7084 Register RHS = Logic->getRHSReg();
7085 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7086
7087 // We need a compare on the LHS register.
7088 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7089 if (!Cmp1)
7090 return false;
7091
7092 // We need a compare on the RHS register.
7093 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7094 if (!Cmp2)
7095 return false;
7096
7097 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7098 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7099
7100 // We build one fcmp, want to fold the fcmps, replace the logic op,
7101 // and the fcmps must have the same shape.
7103 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7104 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7105 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7106 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7107 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7108 return false;
7109
7110 CmpInst::Predicate PredL = Cmp1->getCond();
7111 CmpInst::Predicate PredR = Cmp2->getCond();
7112 Register LHS0 = Cmp1->getLHSReg();
7113 Register LHS1 = Cmp1->getRHSReg();
7114 Register RHS0 = Cmp2->getLHSReg();
7115 Register RHS1 = Cmp2->getRHSReg();
7116
7117 if (LHS0 == RHS1 && LHS1 == RHS0) {
7118 // Swap RHS operands to match LHS.
7119 PredR = CmpInst::getSwappedPredicate(PredR);
7120 std::swap(RHS0, RHS1);
7121 }
7122
7123 if (LHS0 == RHS0 && LHS1 == RHS1) {
7124 // We determine the new predicate.
7125 unsigned CmpCodeL = getFCmpCode(PredL);
7126 unsigned CmpCodeR = getFCmpCode(PredR);
7127 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7128 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7129 MatchInfo = [=](MachineIRBuilder &B) {
7130 // The fcmp predicates fill the lower part of the enum.
7131 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7132 if (Pred == FCmpInst::FCMP_FALSE &&
7134 auto False = B.buildConstant(CmpTy, 0);
7135 B.buildZExtOrTrunc(DestReg, False);
7136 } else if (Pred == FCmpInst::FCMP_TRUE &&
7138 auto True =
7139 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7140 CmpTy.isVector() /*isVector*/,
7141 true /*isFP*/));
7142 B.buildZExtOrTrunc(DestReg, True);
7143 } else { // We take the predicate without predicate optimizations.
7144 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7145 B.buildZExtOrTrunc(DestReg, Cmp);
7146 }
7147 };
7148 return true;
7149 }
7150
7151 return false;
7152}
7153
7155 GAnd *And = cast<GAnd>(&MI);
7156
7157 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7158 return true;
7159
7160 if (tryFoldLogicOfFCmps(And, MatchInfo))
7161 return true;
7162
7163 return false;
7164}
7165
7167 GOr *Or = cast<GOr>(&MI);
7168
7169 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7170 return true;
7171
7172 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7173 return true;
7174
7175 return false;
7176}
7177
7179 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7180
7181 // Addo has no flags
7182 Register Dst = Add->getReg(0);
7183 Register Carry = Add->getReg(1);
7184 Register LHS = Add->getLHSReg();
7185 Register RHS = Add->getRHSReg();
7186 bool IsSigned = Add->isSigned();
7187 LLT DstTy = MRI.getType(Dst);
7188 LLT CarryTy = MRI.getType(Carry);
7189
7190 // Fold addo, if the carry is dead -> add, undef.
7191 if (MRI.use_nodbg_empty(Carry) &&
7192 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7193 MatchInfo = [=](MachineIRBuilder &B) {
7194 B.buildAdd(Dst, LHS, RHS);
7195 B.buildUndef(Carry);
7196 };
7197 return true;
7198 }
7199
7200 // Canonicalize constant to RHS.
7201 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7202 if (IsSigned) {
7203 MatchInfo = [=](MachineIRBuilder &B) {
7204 B.buildSAddo(Dst, Carry, RHS, LHS);
7205 };
7206 return true;
7207 }
7208 // !IsSigned
7209 MatchInfo = [=](MachineIRBuilder &B) {
7210 B.buildUAddo(Dst, Carry, RHS, LHS);
7211 };
7212 return true;
7213 }
7214
7215 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7216 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7217
7218 // Fold addo(c1, c2) -> c3, carry.
7219 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7221 bool Overflow;
7222 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7223 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7224 MatchInfo = [=](MachineIRBuilder &B) {
7225 B.buildConstant(Dst, Result);
7226 B.buildConstant(Carry, Overflow);
7227 };
7228 return true;
7229 }
7230
7231 // Fold (addo x, 0) -> x, no carry
7232 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7233 MatchInfo = [=](MachineIRBuilder &B) {
7234 B.buildCopy(Dst, LHS);
7235 B.buildConstant(Carry, 0);
7236 };
7237 return true;
7238 }
7239
7240 // Given 2 constant operands whose sum does not overflow:
7241 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7242 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7243 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7244 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7245 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7246 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7247 std::optional<APInt> MaybeAddRHS =
7248 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7249 if (MaybeAddRHS) {
7250 bool Overflow;
7251 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7252 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7253 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7254 if (IsSigned) {
7255 MatchInfo = [=](MachineIRBuilder &B) {
7256 auto ConstRHS = B.buildConstant(DstTy, NewC);
7257 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7258 };
7259 return true;
7260 }
7261 // !IsSigned
7262 MatchInfo = [=](MachineIRBuilder &B) {
7263 auto ConstRHS = B.buildConstant(DstTy, NewC);
7264 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7265 };
7266 return true;
7267 }
7268 }
7269 };
7270
7271 // We try to combine addo to non-overflowing add.
7272 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7274 return false;
7275
7276 // We try to combine uaddo to non-overflowing add.
7277 if (!IsSigned) {
7278 ConstantRange CRLHS =
7279 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7280 ConstantRange CRRHS =
7281 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7282
7283 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7285 return false;
7287 MatchInfo = [=](MachineIRBuilder &B) {
7288 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7289 B.buildConstant(Carry, 0);
7290 };
7291 return true;
7292 }
7295 MatchInfo = [=](MachineIRBuilder &B) {
7296 B.buildAdd(Dst, LHS, RHS);
7297 B.buildConstant(Carry, 1);
7298 };
7299 return true;
7300 }
7301 }
7302 return false;
7303 }
7304
7305 // We try to combine saddo to non-overflowing add.
7306
7307 // If LHS and RHS each have at least two sign bits, then there is no signed
7308 // overflow.
7309 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7310 MatchInfo = [=](MachineIRBuilder &B) {
7311 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7312 B.buildConstant(Carry, 0);
7313 };
7314 return true;
7315 }
7316
7317 ConstantRange CRLHS =
7318 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7319 ConstantRange CRRHS =
7320 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7321
7322 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7324 return false;
7326 MatchInfo = [=](MachineIRBuilder &B) {
7327 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7328 B.buildConstant(Carry, 0);
7329 };
7330 return true;
7331 }
7334 MatchInfo = [=](MachineIRBuilder &B) {
7335 B.buildAdd(Dst, LHS, RHS);
7336 B.buildConstant(Carry, 1);
7337 };
7338 return true;
7339 }
7340 }
7341
7342 return false;
7343}
7344
7346 BuildFnTy &MatchInfo) {
7348 MatchInfo(Builder);
7349 Root->eraseFromParent();
7350}
7351
7353 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7355}
7356
7358 auto [Dst, Base] = MI.getFirst2Regs();
7359 LLT Ty = MRI.getType(Dst);
7360 int64_t ExpVal = Exponent;
7361
7362 if (ExpVal == 0) {
7363 Builder.buildFConstant(Dst, 1.0);
7364 MI.removeFromParent();
7365 return;
7366 }
7367
7368 if (ExpVal < 0)
7369 ExpVal = -ExpVal;
7370
7371 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
7372 // to generate the multiply sequence. There are more optimal ways to do this
7373 // (for example, powi(x,15) generates one more multiply than it should), but
7374 // this has the benefit of being both really simple and much better than a
7375 // libcall.
7376 std::optional<SrcOp> Res;
7377 SrcOp CurSquare = Base;
7378 while (ExpVal > 0) {
7379 if (ExpVal & 1) {
7380 if (!Res)
7381 Res = CurSquare;
7382 else
7383 Res = Builder.buildFMul(Ty, *Res, CurSquare);
7384 }
7385
7386 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
7387 ExpVal >>= 1;
7388 }
7389
7390 // If the original exponent was negative, invert the result, producing
7391 // 1/(x*x*x).
7392 if (Exponent < 0)
7393 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
7394 MI.getFlags());
7395
7396 Builder.buildCopy(Dst, *Res);
7397 MI.eraseFromParent();
7398}
7399
7401 BuildFnTy &MatchInfo) {
7402 GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
7403 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI));
7404
7405 Register Dst = Sext->getReg(0);
7406 Register Src = Trunc->getSrcReg();
7407
7408 LLT DstTy = MRI.getType(Dst);
7409 LLT SrcTy = MRI.getType(Src);
7410
7411 if (DstTy == SrcTy) {
7412 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7413 return true;
7414 }
7415
7416 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7417 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7418 MatchInfo = [=](MachineIRBuilder &B) {
7419 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap);
7420 };
7421 return true;
7422 }
7423
7424 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7425 isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) {
7426 MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
7427 return true;
7428 }
7429
7430 return false;
7431}
7432
7434 BuildFnTy &MatchInfo) {
7435 GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI));
7436 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI));
7437
7438 Register Dst = Zext->getReg(0);
7439 Register Src = Trunc->getSrcReg();
7440
7441 LLT DstTy = MRI.getType(Dst);
7442 LLT SrcTy = MRI.getType(Src);
7443
7444 if (DstTy == SrcTy) {
7445 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7446 return true;
7447 }
7448
7449 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7450 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7451 MatchInfo = [=](MachineIRBuilder &B) {
7452 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap);
7453 };
7454 return true;
7455 }
7456
7457 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7458 isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) {
7459 MatchInfo = [=](MachineIRBuilder &B) {
7460 B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg);
7461 };
7462 return true;
7463 }
7464
7465 return false;
7466}
7467
7469 BuildFnTy &MatchInfo) {
7470 GZext *Zext = cast<GZext>(MRI.getVRegDef(MO.getReg()));
7471
7472 Register Dst = Zext->getReg(0);
7473 Register Src = Zext->getSrcReg();
7474
7475 LLT DstTy = MRI.getType(Dst);
7476 LLT SrcTy = MRI.getType(Src);
7477 const auto &TLI = getTargetLowering();
7478
7479 // Convert zext nneg to sext if sext is the preferred form for the target.
7480 if (isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}}) &&
7481 TLI.isSExtCheaperThanZExt(getMVTForLLT(SrcTy), getMVTForLLT(DstTy))) {
7482 MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
7483 return true;
7484 }
7485
7486 return false;
7487}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1356
bool isNaN() const
Definition: APFloat.h:1346
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1140
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
Class for arbitrary precision integers.
Definition: APInt.h:77
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1161
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1090
int32_t exactLogBase2() const
Definition: APInt.h:1740
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:813
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1597
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1556
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1573
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:467
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:419
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:368
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1614
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:997
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:774
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:759
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine sext of trunc.
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent)
Match FPOWI if it's safe to extend it into a series of multiplications.
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent)
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool matchNonNegZext(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine zext nneg to sext.
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine zext of trunc.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine select to integer min/max.
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValue() const
Definition: Constants.h:313
const APFloat & getValueAPF() const
Definition: Constants.h:312
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Register getSrcReg() const
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a sext.
Represents a trunc.
Represents a G_ZEXTLOAD.
Represents a zext.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FDIV Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1427
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1910
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1387
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1540
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:727
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1510
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1522
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1555
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1587
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:658
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1490
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1420
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:947
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:440
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1612
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1405
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:272
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:488
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:496
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...