LLVM 23.0.0git
AArch64PostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization combines on generic MachineInstrs.
11///
12/// The combines here must preserve instruction legality.
13///
14/// Lowering combines (e.g. pseudo matching) should be handled by
15/// AArch64PostLegalizerLowering.
16///
17/// Combines which don't rely on instruction legality should go in the
18/// AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/STLExtras.h"
40#include "llvm/Support/Debug.h"
41
42#define GET_GICOMBINER_DEPS
43#include "AArch64GenPostLegalizeGICombiner.inc"
44#undef GET_GICOMBINER_DEPS
45
46#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51namespace {
52
53#define GET_GICOMBINER_TYPES
54#include "AArch64GenPostLegalizeGICombiner.inc"
55#undef GET_GICOMBINER_TYPES
56
57/// This combine tries do what performExtractVectorEltCombine does in SDAG.
58/// Rewrite for pairwise fadd pattern
59/// (s32 (g_extract_vector_elt
60/// (g_fadd (vXs32 Other)
61/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
62/// ->
63/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
64/// (g_extract_vector_elt (vXs32 Other) 1))
65bool matchExtractVecEltPairwiseAdd(
67 std::tuple<unsigned, LLT, Register> &MatchInfo) {
68 Register Src1 = MI.getOperand(1).getReg();
69 Register Src2 = MI.getOperand(2).getReg();
70 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
71
72 auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);
73 if (!Cst || Cst->Value != 0)
74 return false;
75 // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
76
77 // Now check for an fadd operation. TODO: expand this for integer add?
78 auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
79 if (!FAddMI)
80 return false;
81
82 // If we add support for integer add, must restrict these types to just s64.
83 unsigned DstSize = DstTy.getSizeInBits();
84 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
85 return false;
86
87 Register Src1Op1 = FAddMI->getOperand(1).getReg();
88 Register Src1Op2 = FAddMI->getOperand(2).getReg();
89 MachineInstr *Shuffle =
90 getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
91 MachineInstr *Other = MRI.getVRegDef(Src1Op1);
92 if (!Shuffle) {
93 Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
94 Other = MRI.getVRegDef(Src1Op2);
95 }
96
97 // We're looking for a shuffle that moves the second element to index 0.
98 if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
99 Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
100 std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
101 std::get<1>(MatchInfo) = DstTy;
102 std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
103 return true;
104 }
105 return false;
106}
107
108void applyExtractVecEltPairwiseAdd(
110 std::tuple<unsigned, LLT, Register> &MatchInfo) {
111 unsigned Opc = std::get<0>(MatchInfo);
112 assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
113 // We want to generate two extracts of elements 0 and 1, and add them.
114 LLT Ty = std::get<1>(MatchInfo);
115 Register Src = std::get<2>(MatchInfo);
116 LLT s64 = LLT::scalar(64);
117 B.setInstrAndDebugLoc(MI);
118 auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
119 auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
120 B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
121 MI.eraseFromParent();
122}
123
125 // TODO: check if extended build vector as well.
126 unsigned Opc = MRI.getVRegDef(R)->getOpcode();
127 return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
128}
129
131 // TODO: check if extended build vector as well.
132 return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
133}
134
135bool matchAArch64MulConstCombine(
137 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
138 assert(MI.getOpcode() == TargetOpcode::G_MUL);
139 Register LHS = MI.getOperand(1).getReg();
140 Register RHS = MI.getOperand(2).getReg();
141 Register Dst = MI.getOperand(0).getReg();
142 const LLT Ty = MRI.getType(LHS);
143
144 // The below optimizations require a constant RHS.
146 if (!Const)
147 return false;
148
149 APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
150 // The following code is ported from AArch64ISelLowering.
151 // Multiplication of a power of two plus/minus one can be done more
152 // cheaply as shift+add/sub. For now, this is true unilaterally. If
153 // future CPUs have a cheaper MADD instruction, this may need to be
154 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
155 // 64-bit is 5 cycles, so this is always a win.
156 // More aggressively, some multiplications N0 * C can be lowered to
157 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
158 // e.g. 6=3*2=(2+1)*2.
159 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
160 // which equals to (1+2)*16-(1+2).
161 // TrailingZeroes is used to test if the mul can be lowered to
162 // shift+add+shift.
163 unsigned TrailingZeroes = ConstValue.countr_zero();
164 if (TrailingZeroes) {
165 // Conservatively do not lower to shift+add+shift if the mul might be
166 // folded into smul or umul.
167 if (MRI.hasOneNonDBGUse(LHS) &&
168 (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
169 return false;
170 // Conservatively do not lower to shift+add+shift if the mul might be
171 // folded into madd or msub.
172 if (MRI.hasOneNonDBGUse(Dst)) {
174 unsigned UseOpc = UseMI.getOpcode();
175 if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
176 UseOpc == TargetOpcode::G_SUB)
177 return false;
178 }
179 }
180 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
181 // and shift+add+shift.
182 APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
183
184 unsigned ShiftAmt, AddSubOpc;
185 // Is the shifted value the LHS operand of the add/sub?
186 bool ShiftValUseIsLHS = true;
187 // Do we need to negate the result?
188 bool NegateResult = false;
189
190 if (ConstValue.isNonNegative()) {
191 // (mul x, 2^N + 1) => (add (shl x, N), x)
192 // (mul x, 2^N - 1) => (sub (shl x, N), x)
193 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
194 APInt SCVMinus1 = ShiftedConstValue - 1;
195 APInt CVPlus1 = ConstValue + 1;
196 if (SCVMinus1.isPowerOf2()) {
197 ShiftAmt = SCVMinus1.logBase2();
198 AddSubOpc = TargetOpcode::G_ADD;
199 } else if (CVPlus1.isPowerOf2()) {
200 ShiftAmt = CVPlus1.logBase2();
201 AddSubOpc = TargetOpcode::G_SUB;
202 } else
203 return false;
204 } else {
205 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
206 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
207 APInt CVNegPlus1 = -ConstValue + 1;
208 APInt CVNegMinus1 = -ConstValue - 1;
209 if (CVNegPlus1.isPowerOf2()) {
210 ShiftAmt = CVNegPlus1.logBase2();
211 AddSubOpc = TargetOpcode::G_SUB;
212 ShiftValUseIsLHS = false;
213 } else if (CVNegMinus1.isPowerOf2()) {
214 ShiftAmt = CVNegMinus1.logBase2();
215 AddSubOpc = TargetOpcode::G_ADD;
216 NegateResult = true;
217 } else
218 return false;
219 }
220
221 if (NegateResult && TrailingZeroes)
222 return false;
223
224 ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
225 auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
226 auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
227
228 Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
229 Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
230 auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
231 assert(!(NegateResult && TrailingZeroes) &&
232 "NegateResult and TrailingZeroes cannot both be true for now.");
233 // Negate the result.
234 if (NegateResult) {
235 B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
236 return;
237 }
238 // Shift the result.
239 if (TrailingZeroes) {
240 B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
241 return;
242 }
243 B.buildCopy(DstReg, Res.getReg(0));
244 };
245 return true;
246}
247
248void applyAArch64MulConstCombine(
250 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
251 B.setInstrAndDebugLoc(MI);
252 ApplyFn(B, MI.getOperand(0).getReg());
253 MI.eraseFromParent();
254}
255
256/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
257/// is a zero, into a G_ZEXT of the first.
258bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
259 auto &Merge = cast<GMerge>(MI);
260 LLT SrcTy = MRI.getType(Merge.getSourceReg(0));
261 if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)
262 return false;
263 return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));
264}
265
266void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
268 // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
269 // ->
270 // %d(s64) = G_ZEXT %a(s32)
271 Observer.changingInstr(MI);
272 MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
273 MI.removeOperand(2);
274 Observer.changedInstr(MI);
275}
276
277/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
278/// instruction.
279bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
280 // If this is coming from a scalar compare then we can use a G_ZEXT instead of
281 // a G_ANYEXT:
282 //
283 // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
284 // %ext:_(s64) = G_ANYEXT %cmp(s32)
285 //
286 // By doing this, we can leverage more KnownBits combines.
287 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
288 Register Dst = MI.getOperand(0).getReg();
289 Register Src = MI.getOperand(1).getReg();
290 return MRI.getType(Dst).isScalar() &&
291 mi_match(Src, MRI,
293 m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
294}
295
296void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
298 GISelChangeObserver &Observer) {
299 Observer.changingInstr(MI);
300 MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
301 Observer.changedInstr(MI);
302}
303
304/// Match a 128b store of zero and split it into two 64 bit stores, for
305/// size/performance reasons.
306bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
308 if (!Store.isSimple())
309 return false;
310 LLT ValTy = MRI.getType(Store.getValueReg());
311 if (ValTy.isScalableVector())
312 return false;
313 if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
314 return false;
315 if (Store.getMemSizeInBits() != ValTy.getSizeInBits())
316 return false; // Don't split truncating stores.
317 if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
318 return false;
319 auto MaybeCst = isConstantOrConstantSplatVector(
320 *MRI.getVRegDef(Store.getValueReg()), MRI);
321 return MaybeCst && MaybeCst->isZero();
322}
323
324void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
326 GISelChangeObserver &Observer) {
327 B.setInstrAndDebugLoc(MI);
329 assert(MRI.getType(Store.getValueReg()).isVector() &&
330 "Expected a vector store value");
331 LLT NewTy = LLT::scalar(64);
332 Register PtrReg = Store.getPointerReg();
333 auto Zero = B.buildConstant(NewTy, 0);
334 auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
335 B.buildConstant(LLT::scalar(64), 8));
336 auto &MF = *MI.getMF();
337 auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
338 auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
339 B.buildStore(Zero, PtrReg, *LowMMO);
340 B.buildStore(Zero, HighPtr, *HighMMO);
341 Store.eraseFromParent();
342}
343
344bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
345 std::tuple<Register, Register, Register> &MatchInfo) {
346 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
347 if (!DstTy.isVector())
348 return false;
349
350 Register AO1, AO2, BVO1, BVO2;
351 if (!mi_match(MI, MRI,
352 m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
353 m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
354 return false;
355
356 auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
357 auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
358 if (!BV1 || !BV2)
359 return false;
360
361 for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
362 auto ValAndVReg1 =
363 getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
364 auto ValAndVReg2 =
365 getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
366 if (!ValAndVReg1 || !ValAndVReg2 ||
367 ValAndVReg1->Value != ~ValAndVReg2->Value)
368 return false;
369 }
370
371 MatchInfo = {AO1, AO2, BVO1};
372 return true;
373}
374
375void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
377 std::tuple<Register, Register, Register> &MatchInfo) {
378 B.setInstrAndDebugLoc(MI);
379 B.buildInstr(
380 AArch64::G_BSP, {MI.getOperand(0).getReg()},
381 {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
382 MI.eraseFromParent();
383}
384
385// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
386bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
387 Register &SrcReg) {
388 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
389
390 if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
391 DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
392 DstTy != LLT::fixed_vector(8, 16))
393 return false;
394
395 auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
396 if (AndMI->getOpcode() != TargetOpcode::G_AND)
397 return false;
398 auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
399 if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
400 return false;
401
402 // Check the constant splat values
404 *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
406 *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
408 *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
409 if (!V1.has_value() || !V2.has_value() || !V3.has_value())
410 return false;
411 unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
412 if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
413 V3 != (HalfSize - 1))
414 return false;
415
416 SrcReg = LShrMI->getOperand(1).getReg();
417
418 return true;
419}
420
421void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
422 MachineIRBuilder &B, Register &SrcReg) {
423 Register DstReg = MI.getOperand(0).getReg();
424 LLT DstTy = MRI.getType(DstReg);
425 LLT HalfTy =
428
429 Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
430 Register CastReg =
431 B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
432 Register CMLTReg =
433 B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
434 .getReg(0);
435
436 B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
437 MI.eraseFromParent();
438}
439
440// Match mul({z/s}ext , {z/s}ext) => {u/s}mull
441bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
443 std::tuple<bool, Register, Register> &MatchInfo) {
444 // Get the instructions that defined the source operand
445 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
446 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
447 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
448 unsigned I1Opc = I1->getOpcode();
449 unsigned I2Opc = I2->getOpcode();
450 unsigned EltSize = DstTy.getScalarSizeInBits();
451
452 if (!DstTy.isVector() || I1->getNumOperands() < 2 || I2->getNumOperands() < 2)
453 return false;
454
455 auto IsAtLeastDoubleExtend = [&](Register R) {
456 LLT Ty = MRI.getType(R);
457 return EltSize >= Ty.getScalarSizeInBits() * 2;
458 };
459
460 // If the source operands were EXTENDED before, then {U/S}MULL can be used
461 bool IsZExt1 =
462 I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_ANYEXT;
463 bool IsZExt2 =
464 I2Opc == TargetOpcode::G_ZEXT || I2Opc == TargetOpcode::G_ANYEXT;
465 if (IsZExt1 && IsZExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&
466 IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {
467 get<0>(MatchInfo) = true;
468 get<1>(MatchInfo) = I1->getOperand(1).getReg();
469 get<2>(MatchInfo) = I2->getOperand(1).getReg();
470 return true;
471 }
472
473 bool IsSExt1 =
474 I1Opc == TargetOpcode::G_SEXT || I1Opc == TargetOpcode::G_ANYEXT;
475 bool IsSExt2 =
476 I2Opc == TargetOpcode::G_SEXT || I2Opc == TargetOpcode::G_ANYEXT;
477 if (IsSExt1 && IsSExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&
478 IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {
479 get<0>(MatchInfo) = false;
480 get<1>(MatchInfo) = I1->getOperand(1).getReg();
481 get<2>(MatchInfo) = I2->getOperand(1).getReg();
482 return true;
483 }
484
485 // Select UMULL if we can replace the other operand with an extend.
486 APInt Mask = APInt::getHighBitsSet(EltSize, EltSize / 2);
487 if (KB && (IsZExt1 || IsZExt2) &&
488 IsAtLeastDoubleExtend(IsZExt1 ? I1->getOperand(1).getReg()
489 : I2->getOperand(1).getReg())) {
490 Register ZExtOp =
491 IsZExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();
492 if (KB->maskedValueIsZero(ZExtOp, Mask)) {
493 get<0>(MatchInfo) = true;
494 get<1>(MatchInfo) = IsZExt1 ? I1->getOperand(1).getReg() : ZExtOp;
495 get<2>(MatchInfo) = IsZExt1 ? ZExtOp : I2->getOperand(1).getReg();
496 return true;
497 }
498 } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&
499 KB->maskedValueIsZero(MI.getOperand(1).getReg(), Mask) &&
500 KB->maskedValueIsZero(MI.getOperand(2).getReg(), Mask)) {
501 get<0>(MatchInfo) = true;
502 get<1>(MatchInfo) = MI.getOperand(1).getReg();
503 get<2>(MatchInfo) = MI.getOperand(2).getReg();
504 return true;
505 }
506
507 if (KB && (IsSExt1 || IsSExt2) &&
508 IsAtLeastDoubleExtend(IsSExt1 ? I1->getOperand(1).getReg()
509 : I2->getOperand(1).getReg())) {
510 Register SExtOp =
511 IsSExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();
512 if (KB->computeNumSignBits(SExtOp) > EltSize / 2) {
513 get<0>(MatchInfo) = false;
514 get<1>(MatchInfo) = IsSExt1 ? I1->getOperand(1).getReg() : SExtOp;
515 get<2>(MatchInfo) = IsSExt1 ? SExtOp : I2->getOperand(1).getReg();
516 return true;
517 }
518 } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&
519 KB->computeNumSignBits(MI.getOperand(1).getReg()) > EltSize / 2 &&
520 KB->computeNumSignBits(MI.getOperand(2).getReg()) > EltSize / 2) {
521 get<0>(MatchInfo) = false;
522 get<1>(MatchInfo) = MI.getOperand(1).getReg();
523 get<2>(MatchInfo) = MI.getOperand(2).getReg();
524 return true;
525 }
526
527 return false;
528}
529
530void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
532 std::tuple<bool, Register, Register> &MatchInfo) {
533 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
534 "Expected a G_MUL instruction");
535
536 // Get the instructions that defined the source operand
537 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
538 bool IsZExt = get<0>(MatchInfo);
539 Register Src1Reg = get<1>(MatchInfo);
540 Register Src2Reg = get<2>(MatchInfo);
541 LLT Src1Ty = MRI.getType(Src1Reg);
542 LLT Src2Ty = MRI.getType(Src2Reg);
543 LLT HalfDstTy = DstTy.changeElementSize(DstTy.getScalarSizeInBits() / 2);
544 unsigned ExtOpc = IsZExt ? TargetOpcode::G_ZEXT : TargetOpcode::G_SEXT;
545
546 if (Src1Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
547 Src1Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src1Reg}).getReg(0);
548 if (Src2Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
549 Src2Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src2Reg}).getReg(0);
550
551 B.buildInstr(IsZExt ? AArch64::G_UMULL : AArch64::G_SMULL,
552 {MI.getOperand(0).getReg()}, {Src1Reg, Src2Reg});
553 MI.eraseFromParent();
554}
555
556static bool matchSubAddMulReassoc(Register Mul1, Register Mul2, Register Sub,
557 Register Src, MachineRegisterInfo &MRI) {
558 if (!MRI.hasOneUse(Sub))
559 return false;
561 return false;
563 if (M1->getOpcode() != AArch64::G_MUL &&
564 M1->getOpcode() != AArch64::G_SMULL &&
565 M1->getOpcode() != AArch64::G_UMULL)
566 return false;
567 MachineInstr *M2 = getDefIgnoringCopies(Mul2, MRI);
568 if (M2->getOpcode() != AArch64::G_MUL &&
569 M2->getOpcode() != AArch64::G_SMULL &&
570 M2->getOpcode() != AArch64::G_UMULL)
571 return false;
572 return true;
573}
574
575static void applySubAddMulReassoc(MachineInstr &MI, MachineInstr &Sub,
577 GISelChangeObserver &Observer) {
578 Register Src = MI.getOperand(1).getReg();
579 Register Tmp = MI.getOperand(2).getReg();
580 Register Mul1 = Sub.getOperand(1).getReg();
581 Register Mul2 = Sub.getOperand(2).getReg();
582 Observer.changingInstr(MI);
583 B.buildInstr(AArch64::G_SUB, {Tmp}, {Src, Mul1});
584 MI.getOperand(1).setReg(Tmp);
585 MI.getOperand(2).setReg(Mul2);
586 Sub.eraseFromParent();
587 Observer.changingInstr(MI);
588}
589
590class AArch64PostLegalizerCombinerImpl : public Combiner {
591protected:
592 const CombinerHelper Helper;
593 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
594 const AArch64Subtarget &STI;
595
596public:
597 AArch64PostLegalizerCombinerImpl(
598 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
599 GISelCSEInfo *CSEInfo,
600 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
601 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
602 const LegalizerInfo *LI);
603
604 static const char *getName() { return "AArch64PostLegalizerCombiner"; }
605
606 bool tryCombineAll(MachineInstr &I) const override;
607
608private:
609#define GET_GICOMBINER_CLASS_MEMBERS
610#include "AArch64GenPostLegalizeGICombiner.inc"
611#undef GET_GICOMBINER_CLASS_MEMBERS
612};
613
614#define GET_GICOMBINER_IMPL
615#include "AArch64GenPostLegalizeGICombiner.inc"
616#undef GET_GICOMBINER_IMPL
617
618AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
620 GISelCSEInfo *CSEInfo,
621 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
622 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
623 const LegalizerInfo *LI)
624 : Combiner(MF, CInfo, &VT, CSEInfo),
625 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
626 RuleConfig(RuleConfig), STI(STI),
628#include "AArch64GenPostLegalizeGICombiner.inc"
630{
631}
632
633class AArch64PostLegalizerCombiner : public MachineFunctionPass {
634public:
635 static char ID;
636
637 AArch64PostLegalizerCombiner(bool IsOptNone = false);
638
639 StringRef getPassName() const override {
640 return "AArch64PostLegalizerCombiner";
641 }
642
643 bool runOnMachineFunction(MachineFunction &MF) override;
644 void getAnalysisUsage(AnalysisUsage &AU) const override;
645
646private:
647 bool IsOptNone;
648 AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
649
650
651 struct StoreInfo {
652 GStore *St = nullptr;
653 // The G_PTR_ADD that's used by the store. We keep this to cache the
654 // MachineInstr def.
655 GPtrAdd *Ptr = nullptr;
656 // The signed offset to the Ptr instruction.
657 int64_t Offset = 0;
658 LLT StoredType;
659 };
660 bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
661 CSEMIRBuilder &MIB);
662
663 bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
664 CSEMIRBuilder &MIB);
665};
666} // end anonymous namespace
667
668void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
669 AU.setPreservesCFG();
673 if (!IsOptNone) {
678 }
680}
681
682AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
683 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
684 if (!RuleConfig.parseCommandLineOption())
685 report_fatal_error("Invalid rule identifier");
686}
687
688bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
689 if (MF.getProperties().hasFailedISel())
690 return false;
691 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
692 const Function &F = MF.getFunction();
693 bool EnableOpt =
694 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
695
697 const auto *LI = ST.getLegalizerInfo();
698
700 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
702 IsOptNone ? nullptr
703 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
705 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
706 auto *CSEInfo =
708
709 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
710 /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
711 F.hasMinSize());
712 // Disable fixed-point iteration to reduce compile-time
713 CInfo.MaxIterations = 1;
714 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
715 // Legalizer performs DCE, so a full DCE pass is unnecessary.
716 CInfo.EnableFullDCE = false;
717 AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
718 MDT, LI);
719 bool Changed = Impl.combineMachineInstrs();
720
721 auto MIB = CSEMIRBuilder(MF);
722 MIB.setCSEInfo(CSEInfo);
723 Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
724 return Changed;
725}
726
727bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
729 if (Stores.size() <= 2)
730 return false;
731
732 // Profitabity checks:
733 int64_t BaseOffset = Stores[0].Offset;
734 unsigned NumPairsExpected = Stores.size() / 2;
735 unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
736 // Size savings will depend on whether we can fold the offset, as an
737 // immediate of an ADD.
738 auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
739 if (!TLI.isLegalAddImmediate(BaseOffset))
740 TotalInstsExpected++;
741 int SavingsExpected = Stores.size() - TotalInstsExpected;
742 if (SavingsExpected <= 0)
743 return false;
744
745 auto &MRI = MIB.getMF().getRegInfo();
746
747 // We have a series of consecutive stores. Factor out the common base
748 // pointer and rewrite the offsets.
749 Register NewBase = Stores[0].Ptr->getReg(0);
750 for (auto &SInfo : Stores) {
751 // Compute a new pointer with the new base ptr and adjusted offset.
752 MIB.setInstrAndDebugLoc(*SInfo.St);
753 auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);
754 auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
755 NewBase, NewOff);
756 if (MIB.getObserver())
757 MIB.getObserver()->changingInstr(*SInfo.St);
758 SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
759 if (MIB.getObserver())
760 MIB.getObserver()->changedInstr(*SInfo.St);
761 }
762 LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
763 << " stores into a base pointer and offsets.\n");
764 return true;
765}
766
767static cl::opt<bool>
768 EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
769 cl::init(true), cl::Hidden,
770 cl::desc("Enable consecutive memop optimization "
771 "in AArch64PostLegalizerCombiner"));
772
773bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
774 MachineFunction &MF, CSEMIRBuilder &MIB) {
775 // This combine needs to run after all reassociations/folds on pointer
776 // addressing have been done, specifically those that combine two G_PTR_ADDs
777 // with constant offsets into a single G_PTR_ADD with a combined offset.
778 // The goal of this optimization is to undo that combine in the case where
779 // doing so has prevented the formation of pair stores due to illegal
780 // addressing modes of STP. The reason that we do it here is because
781 // it's much easier to undo the transformation of a series consecutive
782 // mem ops, than it is to detect when doing it would be a bad idea looking
783 // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
784 //
785 // An example:
786 // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
787 // %off1:_(s64) = G_CONSTANT i64 4128
788 // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
789 // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
790 // %off2:_(s64) = G_CONSTANT i64 4144
791 // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
792 // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
793 // %off3:_(s64) = G_CONSTANT i64 4160
794 // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
795 // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
796 bool Changed = false;
797 auto &MRI = MF.getRegInfo();
798
800 return Changed;
801
803 // If we see a load, then we keep track of any values defined by it.
804 // In the following example, STP formation will fail anyway because
805 // the latter store is using a load result that appears after the
806 // the prior store. In this situation if we factor out the offset then
807 // we increase code size for no benefit.
808 // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
809 // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
810 // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
811 SmallVector<Register> LoadValsSinceLastStore;
812
813 auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
814 // Check if this store is consecutive to the last one.
815 if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
816 (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
817 New.Offset) ||
818 Last.StoredType != New.StoredType)
819 return false;
820
821 // Check if this store is using a load result that appears after the
822 // last store. If so, bail out.
823 if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
824 return New.St->getValueReg() == LoadVal;
825 }))
826 return false;
827
828 // Check if the current offset would be too large for STP.
829 // If not, then STP formation should be able to handle it, so we don't
830 // need to do anything.
831 int64_t MaxLegalOffset;
832 switch (New.StoredType.getSizeInBits()) {
833 case 32:
834 MaxLegalOffset = 252;
835 break;
836 case 64:
837 MaxLegalOffset = 504;
838 break;
839 case 128:
840 MaxLegalOffset = 1008;
841 break;
842 default:
843 llvm_unreachable("Unexpected stored type size");
844 }
845 if (New.Offset < MaxLegalOffset)
846 return false;
847
848 // If factoring it out still wouldn't help then don't bother.
849 return New.Offset - Stores[0].Offset <= MaxLegalOffset;
850 };
851
852 auto resetState = [&]() {
853 Stores.clear();
854 LoadValsSinceLastStore.clear();
855 };
856
857 for (auto &MBB : MF) {
858 // We're looking inside a single BB at a time since the memset pattern
859 // should only be in a single block.
860 resetState();
861 for (auto &MI : MBB) {
862 // Skip for scalable vectors
863 if (auto *LdSt = dyn_cast<GLoadStore>(&MI);
864 LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())
865 continue;
866
867 if (auto *St = dyn_cast<GStore>(&MI)) {
868 Register PtrBaseReg;
870 LLT StoredValTy = MRI.getType(St->getValueReg());
871 unsigned ValSize = StoredValTy.getSizeInBits();
872 if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)
873 continue;
874
875 Register PtrReg = St->getPointerReg();
876 if (mi_match(
877 PtrReg, MRI,
878 m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
879 GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
880 StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
881
882 if (Stores.empty()) {
883 Stores.push_back(New);
884 continue;
885 }
886
887 // Check if this store is a valid continuation of the sequence.
888 auto &Last = Stores.back();
889 if (storeIsValid(Last, New)) {
890 Stores.push_back(New);
891 LoadValsSinceLastStore.clear(); // Reset the load value tracking.
892 } else {
893 // The store isn't a valid to consider for the prior sequence,
894 // so try to optimize what we have so far and start a new sequence.
895 Changed |= tryOptimizeConsecStores(Stores, MIB);
896 resetState();
897 Stores.push_back(New);
898 }
899 }
900 } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
901 LoadValsSinceLastStore.push_back(Ld->getDstReg());
902 }
903 }
904 Changed |= tryOptimizeConsecStores(Stores, MIB);
905 resetState();
906 }
907
908 return Changed;
909}
910
911char AArch64PostLegalizerCombiner::ID = 0;
912INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,
913 "Combine AArch64 MachineInstrs after legalization", false,
914 false)
916INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
917 "Combine AArch64 MachineInstrs after legalization", false,
918 false)
919
920namespace llvm {
922 return new AArch64PostLegalizerCombiner(IsOptNone);
923}
924} // end namespace llvm
MachineInstrBuilder & UseMI
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static cl::opt< bool > EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops", cl::init(true), cl::Hidden, cl::desc("Enable consecutive memop optimization " "in AArch64PostLegalizerCombiner"))
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
R600 Clause Merge
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned logBase2() const
Definition APInt.h:1776
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val) override
Build and insert Res = G_CONSTANT Val.
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The actual analysis pass wrapper.
Definition CSEInfo.h:242
Simple wrapper that does the following.
Definition CSEInfo.h:212
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
bool maskedValueIsZero(Register Val, const APInt &Mask)
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Represents a G_PTR_ADD.
Represents a G_STORE.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
GISelChangeObserver * getObserver()
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getNumOperands() const
Retuns the total number of operands.
const MachineOperand & getOperand(unsigned i) const
ArrayRef< int > getShuffleMask() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
virtual const TargetLowering * getTargetLowering() const
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP > m_GICmp(const Pred &P, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1589
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:493
LLVM_ABI std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)
Definition CSEInfo.cpp:85
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionPass * createAArch64PostLegalizerCombiner(bool IsOptNone)
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1209
@ Sub
Subtraction of integers.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...