LLVM 23.0.0git
AArch64PostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization combines on generic MachineInstrs.
11///
12/// The combines here must preserve instruction legality.
13///
14/// Lowering combines (e.g. pseudo matching) should be handled by
15/// AArch64PostLegalizerLowering.
16///
17/// Combines which don't rely on instruction legality should go in the
18/// AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/STLExtras.h"
41#include "llvm/Support/Debug.h"
42
43#define GET_GICOMBINER_DEPS
44#include "AArch64GenPostLegalizeGICombiner.inc"
45#undef GET_GICOMBINER_DEPS
46
47#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
48
49using namespace llvm;
50using namespace MIPatternMatch;
51
52namespace {
53
54#define GET_GICOMBINER_TYPES
55#include "AArch64GenPostLegalizeGICombiner.inc"
56#undef GET_GICOMBINER_TYPES
57
58/// This combine tries do what performExtractVectorEltCombine does in SDAG.
59/// Rewrite for pairwise fadd pattern
60/// (s32 (g_extract_vector_elt
61/// (g_fadd (vXs32 Other)
62/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
63/// ->
64/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
65/// (g_extract_vector_elt (vXs32 Other) 1))
66bool matchExtractVecEltPairwiseAdd(
68 std::tuple<unsigned, LLT, Register> &MatchInfo) {
69 Register Src1 = MI.getOperand(1).getReg();
70 Register Src2 = MI.getOperand(2).getReg();
71 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
72
74 if (!Cst || Cst->Value != 0)
75 return false;
76 // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
77
78 // Now check for an fadd operation. TODO: expand this for integer add?
79 auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
80 if (!FAddMI)
81 return false;
82
83 // If we add support for integer add, must restrict these types to just s64.
84 unsigned DstSize = DstTy.getSizeInBits();
85 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
86 return false;
87
88 Register Src1Op1 = FAddMI->getOperand(1).getReg();
89 Register Src1Op2 = FAddMI->getOperand(2).getReg();
90 MachineInstr *Shuffle =
91 getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
92 MachineInstr *Other = MRI.getVRegDef(Src1Op1);
93 if (!Shuffle) {
94 Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
95 Other = MRI.getVRegDef(Src1Op2);
96 }
97
98 // We're looking for a shuffle that moves the second element to index 0.
99 if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
100 Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
101 std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
102 std::get<1>(MatchInfo) = DstTy;
103 std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
104 return true;
105 }
106 return false;
107}
108
109void applyExtractVecEltPairwiseAdd(
111 std::tuple<unsigned, LLT, Register> &MatchInfo) {
112 unsigned Opc = std::get<0>(MatchInfo);
113 assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
114 // We want to generate two extracts of elements 0 and 1, and add them.
115 LLT Ty = std::get<1>(MatchInfo);
116 Register Src = std::get<2>(MatchInfo);
117 LLT s64 = LLT::scalar(64);
118 B.setInstrAndDebugLoc(MI);
119 auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
120 auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
121 B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
122 MI.eraseFromParent();
123}
124
126 // TODO: check if extended build vector as well.
127 unsigned Opc = MRI.getVRegDef(R)->getOpcode();
128 return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
129}
130
132 // TODO: check if extended build vector as well.
133 return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
134}
135
136bool matchAArch64MulConstCombine(
138 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
139 assert(MI.getOpcode() == TargetOpcode::G_MUL);
140 Register LHS = MI.getOperand(1).getReg();
141 Register RHS = MI.getOperand(2).getReg();
142 Register Dst = MI.getOperand(0).getReg();
143 const LLT Ty = MRI.getType(LHS);
144
145 // The below optimizations require a constant RHS.
147 if (!Const)
148 return false;
149
150 APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
151 // The following code is ported from AArch64ISelLowering.
152 // Multiplication of a power of two plus/minus one can be done more
153 // cheaply as shift+add/sub. For now, this is true unilaterally. If
154 // future CPUs have a cheaper MADD instruction, this may need to be
155 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
156 // 64-bit is 5 cycles, so this is always a win.
157 // More aggressively, some multiplications N0 * C can be lowered to
158 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
159 // e.g. 6=3*2=(2+1)*2.
160 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
161 // which equals to (1+2)*16-(1+2).
162 // TrailingZeroes is used to test if the mul can be lowered to
163 // shift+add+shift.
164 unsigned TrailingZeroes = ConstValue.countr_zero();
165 if (TrailingZeroes) {
166 // Conservatively do not lower to shift+add+shift if the mul might be
167 // folded into smul or umul.
168 if (MRI.hasOneNonDBGUse(LHS) &&
170 return false;
171 // Conservatively do not lower to shift+add+shift if the mul might be
172 // folded into madd or msub.
173 if (MRI.hasOneNonDBGUse(Dst)) {
174 MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
175 unsigned UseOpc = UseMI.getOpcode();
176 if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
177 UseOpc == TargetOpcode::G_SUB)
178 return false;
179 }
180 }
181 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
182 // and shift+add+shift.
183 APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
184
185 unsigned ShiftAmt, AddSubOpc;
186 // Is the shifted value the LHS operand of the add/sub?
187 bool ShiftValUseIsLHS = true;
188 // Do we need to negate the result?
189 bool NegateResult = false;
190
191 if (ConstValue.isNonNegative()) {
192 // (mul x, 2^N + 1) => (add (shl x, N), x)
193 // (mul x, 2^N - 1) => (sub (shl x, N), x)
194 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
195 APInt SCVMinus1 = ShiftedConstValue - 1;
196 APInt CVPlus1 = ConstValue + 1;
197 if (SCVMinus1.isPowerOf2()) {
198 ShiftAmt = SCVMinus1.logBase2();
199 AddSubOpc = TargetOpcode::G_ADD;
200 } else if (CVPlus1.isPowerOf2()) {
201 ShiftAmt = CVPlus1.logBase2();
202 AddSubOpc = TargetOpcode::G_SUB;
203 } else
204 return false;
205 } else {
206 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
207 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
208 APInt CVNegPlus1 = -ConstValue + 1;
209 APInt CVNegMinus1 = -ConstValue - 1;
210 if (CVNegPlus1.isPowerOf2()) {
211 ShiftAmt = CVNegPlus1.logBase2();
212 AddSubOpc = TargetOpcode::G_SUB;
213 ShiftValUseIsLHS = false;
214 } else if (CVNegMinus1.isPowerOf2()) {
215 ShiftAmt = CVNegMinus1.logBase2();
216 AddSubOpc = TargetOpcode::G_ADD;
217 NegateResult = true;
218 } else
219 return false;
220 }
221
222 if (NegateResult && TrailingZeroes)
223 return false;
224
225 ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
226 auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
227 auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
228
229 Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
230 Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
231 auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
232 assert(!(NegateResult && TrailingZeroes) &&
233 "NegateResult and TrailingZeroes cannot both be true for now.");
234 // Negate the result.
235 if (NegateResult) {
236 B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
237 return;
238 }
239 // Shift the result.
240 if (TrailingZeroes) {
241 B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
242 return;
243 }
244 B.buildCopy(DstReg, Res.getReg(0));
245 };
246 return true;
247}
248
249void applyAArch64MulConstCombine(
251 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
252 B.setInstrAndDebugLoc(MI);
253 ApplyFn(B, MI.getOperand(0).getReg());
254 MI.eraseFromParent();
255}
256
257/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
258/// is a zero, into a G_ZEXT of the first.
259bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
260 auto &Merge = cast<GMerge>(MI);
261 LLT SrcTy = MRI.getType(Merge.getSourceReg(0));
262 if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)
263 return false;
264 return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));
265}
266
267void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
269 // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
270 // ->
271 // %d(s64) = G_ZEXT %a(s32)
272 Observer.changingInstr(MI);
273 MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
274 MI.removeOperand(2);
275 Observer.changedInstr(MI);
276}
277
278/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
279/// instruction.
280bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
281 // If this is coming from a scalar compare then we can use a G_ZEXT instead of
282 // a G_ANYEXT:
283 //
284 // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
285 // %ext:_(s64) = G_ANYEXT %cmp(s32)
286 //
287 // By doing this, we can leverage more KnownBits combines.
288 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
289 Register Dst = MI.getOperand(0).getReg();
290 Register Src = MI.getOperand(1).getReg();
291 return MRI.getType(Dst).isScalar() &&
292 mi_match(Src, MRI,
294 m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
295}
296
297void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
299 GISelChangeObserver &Observer) {
300 Observer.changingInstr(MI);
301 MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
302 Observer.changedInstr(MI);
303}
304
305/// Match a 128b store of zero and split it into two 64 bit stores, for
306/// size/performance reasons.
307bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
309 if (!Store.isSimple())
310 return false;
311 LLT ValTy = MRI.getType(Store.getValueReg());
312 if (ValTy.isScalableVector())
313 return false;
314 if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
315 return false;
316 if (Store.getMemSizeInBits() != ValTy.getSizeInBits())
317 return false; // Don't split truncating stores.
318 if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
319 return false;
320 auto MaybeCst = isConstantOrConstantSplatVector(
321 *MRI.getVRegDef(Store.getValueReg()), MRI);
322 return MaybeCst && MaybeCst->isZero();
323}
324
325void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
327 GISelChangeObserver &Observer) {
328 B.setInstrAndDebugLoc(MI);
330 assert(MRI.getType(Store.getValueReg()).isVector() &&
331 "Expected a vector store value");
332 LLT NewTy = LLT::scalar(64);
333 Register PtrReg = Store.getPointerReg();
334 auto Zero = B.buildConstant(NewTy, 0);
335 auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
336 B.buildConstant(LLT::scalar(64), 8));
337 auto &MF = *MI.getMF();
338 auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
339 auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
340 B.buildStore(Zero, PtrReg, *LowMMO);
341 B.buildStore(Zero, HighPtr, *HighMMO);
342 Store.eraseFromParent();
343}
344
345bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
346 std::tuple<Register, Register, Register> &MatchInfo) {
347 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
348 if (!DstTy.isVector())
349 return false;
350
351 Register AO1, AO2, BVO1, BVO2;
352 if (!mi_match(MI, MRI,
353 m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
354 m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
355 return false;
356
357 auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
358 auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
359 if (!BV1 || !BV2)
360 return false;
361
362 for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
363 auto ValAndVReg1 =
364 getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
365 auto ValAndVReg2 =
366 getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
367 if (!ValAndVReg1 || !ValAndVReg2 ||
368 ValAndVReg1->Value != ~ValAndVReg2->Value)
369 return false;
370 }
371
372 MatchInfo = {AO1, AO2, BVO1};
373 return true;
374}
375
376void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
378 std::tuple<Register, Register, Register> &MatchInfo) {
379 B.setInstrAndDebugLoc(MI);
380 B.buildInstr(
381 AArch64::G_BSP, {MI.getOperand(0).getReg()},
382 {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
383 MI.eraseFromParent();
384}
385
386// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
387bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
388 Register &SrcReg) {
389 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
390
391 if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
392 DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
393 DstTy != LLT::fixed_vector(8, 16))
394 return false;
395
396 auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
397 if (AndMI->getOpcode() != TargetOpcode::G_AND)
398 return false;
399 auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
400 if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
401 return false;
402
403 // Check the constant splat values
405 *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
407 *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
409 *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
410 if (!V1.has_value() || !V2.has_value() || !V3.has_value())
411 return false;
412 unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
413 if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
414 V3 != (HalfSize - 1))
415 return false;
416
417 SrcReg = LShrMI->getOperand(1).getReg();
418
419 return true;
420}
421
422void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
423 MachineIRBuilder &B, Register &SrcReg) {
424 Register DstReg = MI.getOperand(0).getReg();
425 LLT DstTy = MRI.getType(DstReg);
426 LLT HalfTy =
429
430 Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
431 Register CastReg =
432 B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
433 Register CMLTReg =
434 B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
435 .getReg(0);
436
437 B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
438 MI.eraseFromParent();
439}
440
441// Match mul({z/s}ext , {z/s}ext) => {u/s}mull
442bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
444 std::tuple<bool, Register, Register> &MatchInfo) {
445 // Get the instructions that defined the source operand
446 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
447 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
448 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
449 unsigned I1Opc = I1->getOpcode();
450 unsigned I2Opc = I2->getOpcode();
451 unsigned EltSize = DstTy.getScalarSizeInBits();
452
453 if (!DstTy.isVector() || I1->getNumOperands() < 2 || I2->getNumOperands() < 2)
454 return false;
455
456 auto IsAtLeastDoubleExtend = [&](Register R) {
457 LLT Ty = MRI.getType(R);
458 return EltSize >= Ty.getScalarSizeInBits() * 2;
459 };
460
461 // If the source operands were EXTENDED before, then {U/S}MULL can be used
462 bool IsZExt1 =
463 I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_ANYEXT;
464 bool IsZExt2 =
465 I2Opc == TargetOpcode::G_ZEXT || I2Opc == TargetOpcode::G_ANYEXT;
466 if (IsZExt1 && IsZExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&
467 IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {
468 get<0>(MatchInfo) = true;
469 get<1>(MatchInfo) = I1->getOperand(1).getReg();
470 get<2>(MatchInfo) = I2->getOperand(1).getReg();
471 return true;
472 }
473
474 bool IsSExt1 =
475 I1Opc == TargetOpcode::G_SEXT || I1Opc == TargetOpcode::G_ANYEXT;
476 bool IsSExt2 =
477 I2Opc == TargetOpcode::G_SEXT || I2Opc == TargetOpcode::G_ANYEXT;
478 if (IsSExt1 && IsSExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&
479 IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {
480 get<0>(MatchInfo) = false;
481 get<1>(MatchInfo) = I1->getOperand(1).getReg();
482 get<2>(MatchInfo) = I2->getOperand(1).getReg();
483 return true;
484 }
485
486 // Select UMULL if we can replace the other operand with an extend.
487 APInt Mask = APInt::getHighBitsSet(EltSize, EltSize / 2);
488 if (KB && (IsZExt1 || IsZExt2) &&
489 IsAtLeastDoubleExtend(IsZExt1 ? I1->getOperand(1).getReg()
490 : I2->getOperand(1).getReg())) {
491 Register ZExtOp =
492 IsZExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();
493 if (KB->maskedValueIsZero(ZExtOp, Mask)) {
494 get<0>(MatchInfo) = true;
495 get<1>(MatchInfo) = IsZExt1 ? I1->getOperand(1).getReg() : ZExtOp;
496 get<2>(MatchInfo) = IsZExt1 ? ZExtOp : I2->getOperand(1).getReg();
497 return true;
498 }
499 } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&
500 KB->maskedValueIsZero(MI.getOperand(1).getReg(), Mask) &&
501 KB->maskedValueIsZero(MI.getOperand(2).getReg(), Mask)) {
502 get<0>(MatchInfo) = true;
503 get<1>(MatchInfo) = MI.getOperand(1).getReg();
504 get<2>(MatchInfo) = MI.getOperand(2).getReg();
505 return true;
506 }
507
508 if (KB && (IsSExt1 || IsSExt2) &&
509 IsAtLeastDoubleExtend(IsSExt1 ? I1->getOperand(1).getReg()
510 : I2->getOperand(1).getReg())) {
511 Register SExtOp =
512 IsSExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();
513 if (KB->computeNumSignBits(SExtOp) > EltSize / 2) {
514 get<0>(MatchInfo) = false;
515 get<1>(MatchInfo) = IsSExt1 ? I1->getOperand(1).getReg() : SExtOp;
516 get<2>(MatchInfo) = IsSExt1 ? SExtOp : I2->getOperand(1).getReg();
517 return true;
518 }
519 } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&
520 KB->computeNumSignBits(MI.getOperand(1).getReg()) > EltSize / 2 &&
521 KB->computeNumSignBits(MI.getOperand(2).getReg()) > EltSize / 2) {
522 get<0>(MatchInfo) = false;
523 get<1>(MatchInfo) = MI.getOperand(1).getReg();
524 get<2>(MatchInfo) = MI.getOperand(2).getReg();
525 return true;
526 }
527
528 return false;
529}
530
531void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
533 std::tuple<bool, Register, Register> &MatchInfo) {
534 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
535 "Expected a G_MUL instruction");
536
537 // Get the instructions that defined the source operand
538 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
539 bool IsZExt = get<0>(MatchInfo);
540 Register Src1Reg = get<1>(MatchInfo);
541 Register Src2Reg = get<2>(MatchInfo);
542 LLT Src1Ty = MRI.getType(Src1Reg);
543 LLT Src2Ty = MRI.getType(Src2Reg);
544 LLT HalfDstTy = DstTy.changeElementSize(DstTy.getScalarSizeInBits() / 2);
545 unsigned ExtOpc = IsZExt ? TargetOpcode::G_ZEXT : TargetOpcode::G_SEXT;
546
547 if (Src1Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
548 Src1Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src1Reg}).getReg(0);
549 if (Src2Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
550 Src2Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src2Reg}).getReg(0);
551
552 B.buildInstr(IsZExt ? AArch64::G_UMULL : AArch64::G_SMULL,
553 {MI.getOperand(0).getReg()}, {Src1Reg, Src2Reg});
554 MI.eraseFromParent();
555}
556
557static bool matchSubAddMulReassoc(Register Mul1, Register Mul2, Register Sub,
559 if (!MRI.hasOneUse(Sub))
560 return false;
562 return false;
564 if (M1->getOpcode() != AArch64::G_MUL &&
565 M1->getOpcode() != AArch64::G_SMULL &&
566 M1->getOpcode() != AArch64::G_UMULL)
567 return false;
569 if (M2->getOpcode() != AArch64::G_MUL &&
570 M2->getOpcode() != AArch64::G_SMULL &&
571 M2->getOpcode() != AArch64::G_UMULL)
572 return false;
573 return true;
574}
575
576static void applySubAddMulReassoc(MachineInstr &MI, MachineInstr &Sub,
578 GISelChangeObserver &Observer) {
579 Register Src = MI.getOperand(1).getReg();
580 Register Tmp = MI.getOperand(2).getReg();
581 Register Mul1 = Sub.getOperand(1).getReg();
582 Register Mul2 = Sub.getOperand(2).getReg();
583 Observer.changingInstr(MI);
584 B.buildInstr(AArch64::G_SUB, {Tmp}, {Src, Mul1});
585 MI.getOperand(1).setReg(Tmp);
586 MI.getOperand(2).setReg(Mul2);
587 Sub.eraseFromParent();
588 Observer.changingInstr(MI);
589}
590
591class AArch64PostLegalizerCombinerImpl : public Combiner {
592protected:
593 const CombinerHelper Helper;
594 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
595 const AArch64Subtarget &STI;
596
597public:
598 AArch64PostLegalizerCombinerImpl(
599 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
600 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
601 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
602 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
603 const LegalizerInfo *LI);
604
605 static const char *getName() { return "AArch64PostLegalizerCombiner"; }
606
607 bool tryCombineAll(MachineInstr &I) const override;
608
609private:
610#define GET_GICOMBINER_CLASS_MEMBERS
611#include "AArch64GenPostLegalizeGICombiner.inc"
612#undef GET_GICOMBINER_CLASS_MEMBERS
613};
614
615#define GET_GICOMBINER_IMPL
616#include "AArch64GenPostLegalizeGICombiner.inc"
617#undef GET_GICOMBINER_IMPL
618
619AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
620 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
621 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
622 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
623 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
624 const LegalizerInfo *LI)
625 : Combiner(MF, CInfo, TPC, &VT, CSEInfo),
626 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
627 RuleConfig(RuleConfig), STI(STI),
629#include "AArch64GenPostLegalizeGICombiner.inc"
631{
632}
633
634class AArch64PostLegalizerCombiner : public MachineFunctionPass {
635public:
636 static char ID;
637
638 AArch64PostLegalizerCombiner(bool IsOptNone = false);
639
640 StringRef getPassName() const override {
641 return "AArch64PostLegalizerCombiner";
642 }
643
644 bool runOnMachineFunction(MachineFunction &MF) override;
645 void getAnalysisUsage(AnalysisUsage &AU) const override;
646
647private:
648 bool IsOptNone;
649 AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
650
651
652 struct StoreInfo {
653 GStore *St = nullptr;
654 // The G_PTR_ADD that's used by the store. We keep this to cache the
655 // MachineInstr def.
656 GPtrAdd *Ptr = nullptr;
657 // The signed offset to the Ptr instruction.
658 int64_t Offset = 0;
659 LLT StoredType;
660 };
661 bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
662 CSEMIRBuilder &MIB);
663
664 bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
665 CSEMIRBuilder &MIB);
666};
667} // end anonymous namespace
668
669void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
671 AU.setPreservesCFG();
675 if (!IsOptNone) {
680 }
682}
683
684AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
685 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
686 if (!RuleConfig.parseCommandLineOption())
687 report_fatal_error("Invalid rule identifier");
688}
689
690bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
691 if (MF.getProperties().hasFailedISel())
692 return false;
693 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
694 auto *TPC = &getAnalysis<TargetPassConfig>();
695 const Function &F = MF.getFunction();
696 bool EnableOpt =
697 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
698
700 const auto *LI = ST.getLegalizerInfo();
701
703 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
705 IsOptNone ? nullptr
706 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
708 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
709 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
710
711 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
712 /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
713 F.hasMinSize());
714 // Disable fixed-point iteration to reduce compile-time
715 CInfo.MaxIterations = 1;
716 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
717 // Legalizer performs DCE, so a full DCE pass is unnecessary.
718 CInfo.EnableFullDCE = false;
719 AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo,
720 RuleConfig, ST, MDT, LI);
721 bool Changed = Impl.combineMachineInstrs();
722
723 auto MIB = CSEMIRBuilder(MF);
724 MIB.setCSEInfo(CSEInfo);
725 Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
726 return Changed;
727}
728
729bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(
731 if (Stores.size() <= 2)
732 return false;
733
734 // Profitabity checks:
735 int64_t BaseOffset = Stores[0].Offset;
736 unsigned NumPairsExpected = Stores.size() / 2;
737 unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
738 // Size savings will depend on whether we can fold the offset, as an
739 // immediate of an ADD.
740 auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
741 if (!TLI.isLegalAddImmediate(BaseOffset))
742 TotalInstsExpected++;
743 int SavingsExpected = Stores.size() - TotalInstsExpected;
744 if (SavingsExpected <= 0)
745 return false;
746
747 auto &MRI = MIB.getMF().getRegInfo();
748
749 // We have a series of consecutive stores. Factor out the common base
750 // pointer and rewrite the offsets.
751 Register NewBase = Stores[0].Ptr->getReg(0);
752 for (auto &SInfo : Stores) {
753 // Compute a new pointer with the new base ptr and adjusted offset.
754 MIB.setInstrAndDebugLoc(*SInfo.St);
755 auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);
756 auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
757 NewBase, NewOff);
758 if (MIB.getObserver())
759 MIB.getObserver()->changingInstr(*SInfo.St);
760 SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
761 if (MIB.getObserver())
762 MIB.getObserver()->changedInstr(*SInfo.St);
763 }
764 LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
765 << " stores into a base pointer and offsets.\n");
766 return true;
767}
768
769static cl::opt<bool>
770 EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
771 cl::init(true), cl::Hidden,
772 cl::desc("Enable consecutive memop optimization "
773 "in AArch64PostLegalizerCombiner"));
774
775bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(
776 MachineFunction &MF, CSEMIRBuilder &MIB) {
777 // This combine needs to run after all reassociations/folds on pointer
778 // addressing have been done, specifically those that combine two G_PTR_ADDs
779 // with constant offsets into a single G_PTR_ADD with a combined offset.
780 // The goal of this optimization is to undo that combine in the case where
781 // doing so has prevented the formation of pair stores due to illegal
782 // addressing modes of STP. The reason that we do it here is because
783 // it's much easier to undo the transformation of a series consecutive
784 // mem ops, than it is to detect when doing it would be a bad idea looking
785 // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
786 //
787 // An example:
788 // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
789 // %off1:_(s64) = G_CONSTANT i64 4128
790 // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
791 // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
792 // %off2:_(s64) = G_CONSTANT i64 4144
793 // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
794 // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
795 // %off3:_(s64) = G_CONSTANT i64 4160
796 // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
797 // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
798 bool Changed = false;
799 auto &MRI = MF.getRegInfo();
800
802 return Changed;
803
805 // If we see a load, then we keep track of any values defined by it.
806 // In the following example, STP formation will fail anyway because
807 // the latter store is using a load result that appears after the
808 // the prior store. In this situation if we factor out the offset then
809 // we increase code size for no benefit.
810 // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
811 // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
812 // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
813 SmallVector<Register> LoadValsSinceLastStore;
814
815 auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
816 // Check if this store is consecutive to the last one.
817 if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
818 (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
819 New.Offset) ||
820 Last.StoredType != New.StoredType)
821 return false;
822
823 // Check if this store is using a load result that appears after the
824 // last store. If so, bail out.
825 if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
826 return New.St->getValueReg() == LoadVal;
827 }))
828 return false;
829
830 // Check if the current offset would be too large for STP.
831 // If not, then STP formation should be able to handle it, so we don't
832 // need to do anything.
833 int64_t MaxLegalOffset;
834 switch (New.StoredType.getSizeInBits()) {
835 case 32:
836 MaxLegalOffset = 252;
837 break;
838 case 64:
839 MaxLegalOffset = 504;
840 break;
841 case 128:
842 MaxLegalOffset = 1008;
843 break;
844 default:
845 llvm_unreachable("Unexpected stored type size");
846 }
847 if (New.Offset < MaxLegalOffset)
848 return false;
849
850 // If factoring it out still wouldn't help then don't bother.
851 return New.Offset - Stores[0].Offset <= MaxLegalOffset;
852 };
853
854 auto resetState = [&]() {
855 Stores.clear();
856 LoadValsSinceLastStore.clear();
857 };
858
859 for (auto &MBB : MF) {
860 // We're looking inside a single BB at a time since the memset pattern
861 // should only be in a single block.
862 resetState();
863 for (auto &MI : MBB) {
864 // Skip for scalable vectors
865 if (auto *LdSt = dyn_cast<GLoadStore>(&MI);
866 LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())
867 continue;
868
869 if (auto *St = dyn_cast<GStore>(&MI)) {
870 Register PtrBaseReg;
872 LLT StoredValTy = MRI.getType(St->getValueReg());
873 unsigned ValSize = StoredValTy.getSizeInBits();
874 if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)
875 continue;
876
877 Register PtrReg = St->getPointerReg();
878 if (mi_match(
879 PtrReg, MRI,
880 m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
881 GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
882 StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
883
884 if (Stores.empty()) {
885 Stores.push_back(New);
886 continue;
887 }
888
889 // Check if this store is a valid continuation of the sequence.
890 auto &Last = Stores.back();
891 if (storeIsValid(Last, New)) {
892 Stores.push_back(New);
893 LoadValsSinceLastStore.clear(); // Reset the load value tracking.
894 } else {
895 // The store isn't a valid to consider for the prior sequence,
896 // so try to optimize what we have so far and start a new sequence.
897 Changed |= tryOptimizeConsecStores(Stores, MIB);
898 resetState();
899 Stores.push_back(New);
900 }
901 }
902 } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
903 LoadValsSinceLastStore.push_back(Ld->getDstReg());
904 }
905 }
906 Changed |= tryOptimizeConsecStores(Stores, MIB);
907 resetState();
908 }
909
910 return Changed;
911}
912
913char AArch64PostLegalizerCombiner::ID = 0;
914INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,
915 "Combine AArch64 MachineInstrs after legalization", false,
916 false)
919INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
920 "Combine AArch64 MachineInstrs after legalization", false,
921 false)
922
923namespace llvm {
925 return new AArch64PostLegalizerCombiner(IsOptNone);
926}
927} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static cl::opt< bool > EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops", cl::init(true), cl::Hidden, cl::desc("Enable consecutive memop optimization " "in AArch64PostLegalizerCombiner"))
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
R600 Clause Merge
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned logBase2() const
Definition APInt.h:1776
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val) override
Build and insert Res = G_CONSTANT Val.
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
Combiner implementation.
Definition Combiner.h:34
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The actual analysis pass wrapper.
Definition CSEInfo.h:242
Simple wrapper that does the following.
Definition CSEInfo.h:212
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
bool maskedValueIsZero(Register Val, const APInt &Mask)
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Represents a G_PTR_ADD.
Represents a G_STORE.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
GISelChangeObserver * getObserver()
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getNumOperands() const
Retuns the total number of operands.
const MachineOperand & getOperand(unsigned i) const
ArrayRef< int > getShuffleMask() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
virtual const TargetLowering * getTargetLowering() const
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP > m_GICmp(const Pred &P, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1566
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionPass * createAArch64PostLegalizerCombiner(bool IsOptNone)
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1186
@ Sub
Subtraction of integers.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...