LLVM 23.0.0git
AArch64PostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization combines on generic MachineInstrs.
11///
12/// The combines here must preserve instruction legality.
13///
14/// Lowering combines (e.g. pseudo matching) should be handled by
15/// AArch64PostLegalizerLowering.
16///
17/// Combines which don't rely on instruction legality should go in the
18/// AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64.h"
24#include "llvm/ADT/STLExtras.h"
43#include "llvm/Support/Debug.h"
44
45#define GET_GICOMBINER_DEPS
46#include "AArch64GenPostLegalizeGICombiner.inc"
47#undef GET_GICOMBINER_DEPS
48
49#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
50
51using namespace llvm;
52using namespace MIPatternMatch;
53
54#define GET_GICOMBINER_TYPES
55#include "AArch64GenPostLegalizeGICombiner.inc"
56#undef GET_GICOMBINER_TYPES
57
58namespace {
59
60/// This combine tries do what performExtractVectorEltCombine does in SDAG.
61/// Rewrite for pairwise fadd pattern
62/// (s32 (g_extract_vector_elt
63/// (g_fadd (vXs32 Other)
64/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
65/// ->
66/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
67/// (g_extract_vector_elt (vXs32 Other) 1))
68bool matchExtractVecEltPairwiseAdd(
70 std::tuple<unsigned, LLT, Register> &MatchInfo) {
71 Register Src1 = MI.getOperand(1).getReg();
72 Register Src2 = MI.getOperand(2).getReg();
73 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
74
75 auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);
76 if (!Cst || Cst->Value != 0)
77 return false;
78 // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
79
80 // Now check for an fadd operation. TODO: expand this for integer add?
81 auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
82 if (!FAddMI)
83 return false;
84
85 // If we add support for integer add, must restrict these types to just s64.
86 unsigned DstSize = DstTy.getSizeInBits();
87 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
88 return false;
89
90 Register Src1Op1 = FAddMI->getOperand(1).getReg();
91 Register Src1Op2 = FAddMI->getOperand(2).getReg();
92 MachineInstr *Shuffle =
93 getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
94 MachineInstr *Other = MRI.getVRegDef(Src1Op1);
95 if (!Shuffle) {
96 Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
97 Other = MRI.getVRegDef(Src1Op2);
98 }
99
100 // We're looking for a shuffle that moves the second element to index 0.
101 if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
102 Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
103 std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
104 std::get<1>(MatchInfo) = DstTy;
105 std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
106 return true;
107 }
108 return false;
109}
110
111void applyExtractVecEltPairwiseAdd(
113 std::tuple<unsigned, LLT, Register> &MatchInfo) {
114 unsigned Opc = std::get<0>(MatchInfo);
115 assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
116 // We want to generate two extracts of elements 0 and 1, and add them.
117 LLT Ty = std::get<1>(MatchInfo);
118 Register Src = std::get<2>(MatchInfo);
119 LLT s64 = LLT::integer(64);
120 B.setInstrAndDebugLoc(MI);
121 auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
122 auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
123 B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
124 MI.eraseFromParent();
125}
126
128 // TODO: check if extended build vector as well.
129 unsigned Opc = MRI.getVRegDef(R)->getOpcode();
130 return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
131}
132
134 // TODO: check if extended build vector as well.
135 return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
136}
137
138bool matchAArch64MulConstCombine(
140 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
141 assert(MI.getOpcode() == TargetOpcode::G_MUL);
142 Register LHS = MI.getOperand(1).getReg();
143 Register RHS = MI.getOperand(2).getReg();
144 Register Dst = MI.getOperand(0).getReg();
145 const LLT Ty = MRI.getType(LHS);
146
147 // The below optimizations require a constant RHS.
148 auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);
149 if (!Const)
150 return false;
151
152 APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
153 // The following code is ported from AArch64ISelLowering.
154 // Multiplication of a power of two plus/minus one can be done more
155 // cheaply as shift+add/sub. For now, this is true unilaterally. If
156 // future CPUs have a cheaper MADD instruction, this may need to be
157 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
158 // 64-bit is 5 cycles, so this is always a win.
159 // More aggressively, some multiplications N0 * C can be lowered to
160 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
161 // e.g. 6=3*2=(2+1)*2.
162 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
163 // which equals to (1+2)*16-(1+2).
164 // TrailingZeroes is used to test if the mul can be lowered to
165 // shift+add+shift.
166 unsigned TrailingZeroes = ConstValue.countr_zero();
167 if (TrailingZeroes) {
168 // Conservatively do not lower to shift+add+shift if the mul might be
169 // folded into smul or umul.
170 if (MRI.hasOneNonDBGUse(LHS) &&
171 (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
172 return false;
173 // Conservatively do not lower to shift+add+shift if the mul might be
174 // folded into madd or msub.
175 if (MRI.hasOneNonDBGUse(Dst)) {
177 unsigned UseOpc = UseMI.getOpcode();
178 if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
179 UseOpc == TargetOpcode::G_SUB)
180 return false;
181 }
182 }
183 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
184 // and shift+add+shift.
185 APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
186
187 unsigned ShiftAmt, AddSubOpc;
188 // Is the shifted value the LHS operand of the add/sub?
189 bool ShiftValUseIsLHS = true;
190 // Do we need to negate the result?
191 bool NegateResult = false;
192
193 if (ConstValue.isNonNegative()) {
194 // (mul x, 2^N + 1) => (add (shl x, N), x)
195 // (mul x, 2^N - 1) => (sub (shl x, N), x)
196 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
197 APInt SCVMinus1 = ShiftedConstValue - 1;
198 APInt CVPlus1 = ConstValue + 1;
199 if (SCVMinus1.isPowerOf2()) {
200 ShiftAmt = SCVMinus1.logBase2();
201 AddSubOpc = TargetOpcode::G_ADD;
202 } else if (CVPlus1.isPowerOf2()) {
203 ShiftAmt = CVPlus1.logBase2();
204 AddSubOpc = TargetOpcode::G_SUB;
205 } else
206 return false;
207 } else {
208 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
209 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
210 APInt CVNegPlus1 = -ConstValue + 1;
211 APInt CVNegMinus1 = -ConstValue - 1;
212 if (CVNegPlus1.isPowerOf2()) {
213 ShiftAmt = CVNegPlus1.logBase2();
214 AddSubOpc = TargetOpcode::G_SUB;
215 ShiftValUseIsLHS = false;
216 } else if (CVNegMinus1.isPowerOf2()) {
217 ShiftAmt = CVNegMinus1.logBase2();
218 AddSubOpc = TargetOpcode::G_ADD;
219 NegateResult = true;
220 } else
221 return false;
222 }
223
224 if (NegateResult && TrailingZeroes)
225 return false;
226
227 ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
228 auto Shift = B.buildConstant(LLT::integer(64), ShiftAmt);
229 auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
230
231 Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
232 Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
233 auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
234 assert(!(NegateResult && TrailingZeroes) &&
235 "NegateResult and TrailingZeroes cannot both be true for now.");
236 // Negate the result.
237 if (NegateResult) {
238 B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
239 return;
240 }
241 // Shift the result.
242 if (TrailingZeroes) {
243 B.buildShl(DstReg, Res,
244 B.buildConstant(LLT::integer(64), TrailingZeroes));
245 return;
246 }
247 B.buildCopy(DstReg, Res.getReg(0));
248 };
249 return true;
250}
251
252void applyAArch64MulConstCombine(
254 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
255 B.setInstrAndDebugLoc(MI);
256 ApplyFn(B, MI.getOperand(0).getReg());
257 MI.eraseFromParent();
258}
259
260/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
261/// is a zero, into a G_ZEXT of the first.
262bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
263 auto &Merge = cast<GMerge>(MI);
264 LLT SrcTy = MRI.getType(Merge.getSourceReg(0));
265 if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)
266 return false;
267 return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));
268}
269
270void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
272 // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
273 // ->
274 // %d(s64) = G_ZEXT %a(s32)
275 Observer.changingInstr(MI);
276 MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
277 MI.removeOperand(2);
278 Observer.changedInstr(MI);
279}
280
281/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
282/// instruction.
283bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
284 // If this is coming from a scalar compare then we can use a G_ZEXT instead of
285 // a G_ANYEXT:
286 //
287 // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
288 // %ext:_(s64) = G_ANYEXT %cmp(s32)
289 //
290 // By doing this, we can leverage more KnownBits combines.
291 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
292 Register Dst = MI.getOperand(0).getReg();
293 Register Src = MI.getOperand(1).getReg();
294 return MRI.getType(Dst).isScalar() &&
295 mi_match(Src, MRI,
297 m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
298}
299
300void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
302 GISelChangeObserver &Observer) {
303 Observer.changingInstr(MI);
304 MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
305 Observer.changedInstr(MI);
306}
307
308/// Match a 128b store of zero and split it into two 64 bit stores, for
309/// size/performance reasons.
310bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
311 GStore &Store = cast<GStore>(MI);
312 if (!Store.isSimple())
313 return false;
314 LLT ValTy = MRI.getType(Store.getValueReg());
315 if (ValTy.isScalableVector())
316 return false;
317 if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
318 return false;
319 if (Store.getMemSizeInBits() != ValTy.getSizeInBits())
320 return false; // Don't split truncating stores.
321 if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
322 return false;
323 auto MaybeCst = isConstantOrConstantSplatVector(
324 *MRI.getVRegDef(Store.getValueReg()), MRI);
325 return MaybeCst && MaybeCst->isZero();
326}
327
328void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
330 GISelChangeObserver &Observer) {
331 B.setInstrAndDebugLoc(MI);
332 GStore &Store = cast<GStore>(MI);
333 assert(MRI.getType(Store.getValueReg()).isVector() &&
334 "Expected a vector store value");
335 LLT NewTy = LLT::integer(64);
336 Register PtrReg = Store.getPointerReg();
337 auto Zero = B.buildConstant(NewTy, 0);
338 auto HighPtr =
339 B.buildPtrAdd(MRI.getType(PtrReg), PtrReg, B.buildConstant(NewTy, 8));
340 auto &MF = *MI.getMF();
341 auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
342 auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
343 B.buildStore(Zero, PtrReg, *LowMMO);
344 B.buildStore(Zero, HighPtr, *HighMMO);
345 Store.eraseFromParent();
346}
347
348bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
349 std::tuple<Register, Register, Register> &MatchInfo) {
350 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
351 if (!DstTy.isVector())
352 return false;
353
354 Register AO1, AO2, BVO1, BVO2;
355 if (!mi_match(MI, MRI,
356 m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
357 m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
358 return false;
359
360 auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
361 auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
362 if (!BV1 || !BV2)
363 return false;
364
365 for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
366 auto ValAndVReg1 =
367 getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
368 auto ValAndVReg2 =
369 getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
370 if (!ValAndVReg1 || !ValAndVReg2 ||
371 ValAndVReg1->Value != ~ValAndVReg2->Value)
372 return false;
373 }
374
375 MatchInfo = {AO1, AO2, BVO1};
376 return true;
377}
378
379void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
381 std::tuple<Register, Register, Register> &MatchInfo) {
382 B.setInstrAndDebugLoc(MI);
383 B.buildInstr(
384 AArch64::G_BSP, {MI.getOperand(0).getReg()},
385 {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
386 MI.eraseFromParent();
387}
388
389// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
390bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
391 Register &SrcReg) {
392 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
393
394 if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
395 DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
396 DstTy != LLT::fixed_vector(8, 16))
397 return false;
398
399 auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
400 if (AndMI->getOpcode() != TargetOpcode::G_AND)
401 return false;
402 auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
403 if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
404 return false;
405
406 // Check the constant splat values
408 *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
410 *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
412 *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
413 if (!V1.has_value() || !V2.has_value() || !V3.has_value())
414 return false;
415 unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
416 if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
417 V3 != (HalfSize - 1))
418 return false;
419
420 SrcReg = LShrMI->getOperand(1).getReg();
421
422 return true;
423}
424
425void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
426 MachineIRBuilder &B, Register &SrcReg) {
427 Register DstReg = MI.getOperand(0).getReg();
428 LLT DstTy = MRI.getType(DstReg);
429 LLT HalfTy =
432
433 Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
434 Register CastReg =
435 B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
436 Register CMLTReg =
437 B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
438 .getReg(0);
439
440 B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
441 MI.eraseFromParent();
442}
443
444// Match mul({z/s}ext , {z/s}ext) => {u/s}mull
445bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
447 std::tuple<bool, Register, Register> &MatchInfo) {
448 // Get the instructions that defined the source operand
449 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
450 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
451 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
452 unsigned I1Opc = I1->getOpcode();
453 unsigned I2Opc = I2->getOpcode();
454 unsigned EltSize = DstTy.getScalarSizeInBits();
455
456 if (!DstTy.isVector() || I1->getNumOperands() < 2 || I2->getNumOperands() < 2)
457 return false;
458
459 auto IsAtLeastDoubleExtend = [&](Register R) {
460 LLT Ty = MRI.getType(R);
461 return EltSize >= Ty.getScalarSizeInBits() * 2;
462 };
463
464 // If the source operands were EXTENDED before, then {U/S}MULL can be used
465 bool IsZExt1 =
466 I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_ANYEXT;
467 bool IsZExt2 =
468 I2Opc == TargetOpcode::G_ZEXT || I2Opc == TargetOpcode::G_ANYEXT;
469 if (IsZExt1 && IsZExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&
470 IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {
471 get<0>(MatchInfo) = true;
472 get<1>(MatchInfo) = I1->getOperand(1).getReg();
473 get<2>(MatchInfo) = I2->getOperand(1).getReg();
474 return true;
475 }
476
477 bool IsSExt1 =
478 I1Opc == TargetOpcode::G_SEXT || I1Opc == TargetOpcode::G_ANYEXT;
479 bool IsSExt2 =
480 I2Opc == TargetOpcode::G_SEXT || I2Opc == TargetOpcode::G_ANYEXT;
481 if (IsSExt1 && IsSExt2 && IsAtLeastDoubleExtend(I1->getOperand(1).getReg()) &&
482 IsAtLeastDoubleExtend(I2->getOperand(1).getReg())) {
483 get<0>(MatchInfo) = false;
484 get<1>(MatchInfo) = I1->getOperand(1).getReg();
485 get<2>(MatchInfo) = I2->getOperand(1).getReg();
486 return true;
487 }
488
489 // Select UMULL if we can replace the other operand with an extend.
490 APInt Mask = APInt::getHighBitsSet(EltSize, EltSize / 2);
491 if (KB && (IsZExt1 || IsZExt2) &&
492 IsAtLeastDoubleExtend(IsZExt1 ? I1->getOperand(1).getReg()
493 : I2->getOperand(1).getReg())) {
494 Register ZExtOp =
495 IsZExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();
496 if (KB->maskedValueIsZero(ZExtOp, Mask)) {
497 get<0>(MatchInfo) = true;
498 get<1>(MatchInfo) = IsZExt1 ? I1->getOperand(1).getReg() : ZExtOp;
499 get<2>(MatchInfo) = IsZExt1 ? ZExtOp : I2->getOperand(1).getReg();
500 return true;
501 }
502 } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&
503 KB->maskedValueIsZero(MI.getOperand(1).getReg(), Mask) &&
504 KB->maskedValueIsZero(MI.getOperand(2).getReg(), Mask)) {
505 get<0>(MatchInfo) = true;
506 get<1>(MatchInfo) = MI.getOperand(1).getReg();
507 get<2>(MatchInfo) = MI.getOperand(2).getReg();
508 return true;
509 }
510
511 if (KB && (IsSExt1 || IsSExt2) &&
512 IsAtLeastDoubleExtend(IsSExt1 ? I1->getOperand(1).getReg()
513 : I2->getOperand(1).getReg())) {
514 Register SExtOp =
515 IsSExt1 ? MI.getOperand(2).getReg() : MI.getOperand(1).getReg();
516 if (KB->computeNumSignBits(SExtOp) > EltSize / 2) {
517 get<0>(MatchInfo) = false;
518 get<1>(MatchInfo) = IsSExt1 ? I1->getOperand(1).getReg() : SExtOp;
519 get<2>(MatchInfo) = IsSExt1 ? SExtOp : I2->getOperand(1).getReg();
520 return true;
521 }
522 } else if (KB && DstTy == LLT::fixed_vector(2, 64) &&
523 KB->computeNumSignBits(MI.getOperand(1).getReg()) > EltSize / 2 &&
524 KB->computeNumSignBits(MI.getOperand(2).getReg()) > EltSize / 2) {
525 get<0>(MatchInfo) = false;
526 get<1>(MatchInfo) = MI.getOperand(1).getReg();
527 get<2>(MatchInfo) = MI.getOperand(2).getReg();
528 return true;
529 }
530
531 return false;
532}
533
534void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
536 std::tuple<bool, Register, Register> &MatchInfo) {
537 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
538 "Expected a G_MUL instruction");
539
540 // Get the instructions that defined the source operand
541 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
542 bool IsZExt = get<0>(MatchInfo);
543 Register Src1Reg = get<1>(MatchInfo);
544 Register Src2Reg = get<2>(MatchInfo);
545 LLT Src1Ty = MRI.getType(Src1Reg);
546 LLT Src2Ty = MRI.getType(Src2Reg);
547 LLT HalfDstTy = DstTy.changeElementSize(DstTy.getScalarSizeInBits() / 2);
548 unsigned ExtOpc = IsZExt ? TargetOpcode::G_ZEXT : TargetOpcode::G_SEXT;
549
550 if (Src1Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
551 Src1Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src1Reg}).getReg(0);
552 if (Src2Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
553 Src2Reg = B.buildExtOrTrunc(ExtOpc, {HalfDstTy}, {Src2Reg}).getReg(0);
554
555 B.buildInstr(IsZExt ? AArch64::G_UMULL : AArch64::G_SMULL,
556 {MI.getOperand(0).getReg()}, {Src1Reg, Src2Reg});
557 MI.eraseFromParent();
558}
559
560static bool matchSubAddMulReassoc(Register Mul1, Register Mul2, Register Sub,
561 Register Src, MachineRegisterInfo &MRI) {
562 if (!MRI.hasOneUse(Sub))
563 return false;
565 return false;
567 if (M1->getOpcode() != AArch64::G_MUL &&
568 M1->getOpcode() != AArch64::G_SMULL &&
569 M1->getOpcode() != AArch64::G_UMULL)
570 return false;
571 MachineInstr *M2 = getDefIgnoringCopies(Mul2, MRI);
572 if (M2->getOpcode() != AArch64::G_MUL &&
573 M2->getOpcode() != AArch64::G_SMULL &&
574 M2->getOpcode() != AArch64::G_UMULL)
575 return false;
576 return true;
577}
578
579static void applySubAddMulReassoc(MachineInstr &MI, MachineInstr &Sub,
581 GISelChangeObserver &Observer) {
582 Register Src = MI.getOperand(1).getReg();
583 Register Tmp = MI.getOperand(2).getReg();
584 Register Mul1 = Sub.getOperand(1).getReg();
585 Register Mul2 = Sub.getOperand(2).getReg();
586 Observer.changingInstr(MI);
587 B.buildInstr(AArch64::G_SUB, {Tmp}, {Src, Mul1});
588 MI.getOperand(1).setReg(Tmp);
589 MI.getOperand(2).setReg(Mul2);
590 Sub.eraseFromParent();
591 Observer.changedInstr(MI);
592}
593
594class AArch64PostLegalizerCombinerImpl : public Combiner {
595protected:
596 const CombinerHelper Helper;
597 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
598 const AArch64Subtarget &STI;
599
600public:
601 AArch64PostLegalizerCombinerImpl(
603 GISelCSEInfo *CSEInfo,
604 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
605 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
606 const LegalizerInfo *LI);
607
608 static const char *getName() { return "AArch64PostLegalizerCombiner"; }
609
610 bool tryCombineAll(MachineInstr &I) const override;
611
612private:
613#define GET_GICOMBINER_CLASS_MEMBERS
614#include "AArch64GenPostLegalizeGICombiner.inc"
615#undef GET_GICOMBINER_CLASS_MEMBERS
616};
617
618#define GET_GICOMBINER_IMPL
619#include "AArch64GenPostLegalizeGICombiner.inc"
620#undef GET_GICOMBINER_IMPL
621
622AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
624 GISelCSEInfo *CSEInfo,
625 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
626 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
627 const LegalizerInfo *LI)
628 : Combiner(MF, CInfo, &VT, CSEInfo),
629 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
630 RuleConfig(RuleConfig), STI(STI),
632#include "AArch64GenPostLegalizeGICombiner.inc"
634{
635}
636
637struct StoreInfo {
638 GStore *St = nullptr;
639 // The G_PTR_ADD that's used by the store. We keep this to cache the
640 // MachineInstr def.
641 GPtrAdd *Ptr = nullptr;
642 // The signed offset to the Ptr instruction.
643 int64_t Offset = 0;
644 LLT StoredType;
645};
646
647static bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
648 CSEMIRBuilder &MIB) {
649 if (Stores.size() <= 2)
650 return false;
651
652 // Profitabity checks:
653 int64_t BaseOffset = Stores[0].Offset;
654 unsigned NumPairsExpected = Stores.size() / 2;
655 unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
656 // Size savings will depend on whether we can fold the offset, as an
657 // immediate of an ADD.
658 auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
659 if (!TLI.isLegalAddImmediate(BaseOffset))
660 TotalInstsExpected++;
661 int SavingsExpected = Stores.size() - TotalInstsExpected;
662 if (SavingsExpected <= 0)
663 return false;
664
665 auto &MRI = MIB.getMF().getRegInfo();
666
667 // We have a series of consecutive stores. Factor out the common base
668 // pointer and rewrite the offsets.
669 Register NewBase = Stores[0].Ptr->getReg(0);
670 for (auto &SInfo : Stores) {
671 // Compute a new pointer with the new base ptr and adjusted offset.
672 MIB.setInstrAndDebugLoc(*SInfo.St);
673 auto NewOff =
674 MIB.buildConstant(LLT::integer(64), SInfo.Offset - BaseOffset);
675 auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),
676 NewBase, NewOff);
677 if (MIB.getObserver())
678 MIB.getObserver()->changingInstr(*SInfo.St);
679 SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));
680 if (MIB.getObserver())
681 MIB.getObserver()->changedInstr(*SInfo.St);
682 }
683 LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
684 << " stores into a base pointer and offsets.\n");
685 return true;
686}
687
688static cl::opt<bool>
689 EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
690 cl::init(true), cl::Hidden,
691 cl::desc("Enable consecutive memop optimization "
692 "in AArch64PostLegalizerCombiner"));
693
694static bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
695 CSEMIRBuilder &MIB) {
696 // This combine needs to run after all reassociations/folds on pointer
697 // addressing have been done, specifically those that combine two G_PTR_ADDs
698 // with constant offsets into a single G_PTR_ADD with a combined offset.
699 // The goal of this optimization is to undo that combine in the case where
700 // doing so has prevented the formation of pair stores due to illegal
701 // addressing modes of STP. The reason that we do it here is because
702 // it's much easier to undo the transformation of a series consecutive
703 // mem ops, than it is to detect when doing it would be a bad idea looking
704 // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
705 //
706 // An example:
707 // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
708 // %off1:_(s64) = G_CONSTANT i64 4128
709 // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
710 // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
711 // %off2:_(s64) = G_CONSTANT i64 4144
712 // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
713 // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
714 // %off3:_(s64) = G_CONSTANT i64 4160
715 // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
716 // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
717 bool Changed = false;
718 auto &MRI = MF.getRegInfo();
719
720 if (!EnableConsecutiveMemOpOpt)
721 return Changed;
722
724 // If we see a load, then we keep track of any values defined by it.
725 // In the following example, STP formation will fail anyway because
726 // the latter store is using a load result that appears after the
727 // the prior store. In this situation if we factor out the offset then
728 // we increase code size for no benefit.
729 // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
730 // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
731 // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
732 SmallVector<Register> LoadValsSinceLastStore;
733
734 auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
735 // Check if this store is consecutive to the last one.
736 if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
737 (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
738 New.Offset) ||
739 Last.StoredType != New.StoredType)
740 return false;
741
742 // Check if this store is using a load result that appears after the
743 // last store. If so, bail out.
744 if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {
745 return New.St->getValueReg() == LoadVal;
746 }))
747 return false;
748
749 // Check if the current offset would be too large for STP.
750 // If not, then STP formation should be able to handle it, so we don't
751 // need to do anything.
752 int64_t MaxLegalOffset;
753 switch (New.StoredType.getSizeInBits()) {
754 case 32:
755 MaxLegalOffset = 252;
756 break;
757 case 64:
758 MaxLegalOffset = 504;
759 break;
760 case 128:
761 MaxLegalOffset = 1008;
762 break;
763 default:
764 llvm_unreachable("Unexpected stored type size");
765 }
766 if (New.Offset < MaxLegalOffset)
767 return false;
768
769 // If factoring it out still wouldn't help then don't bother.
770 return New.Offset - Stores[0].Offset <= MaxLegalOffset;
771 };
772
773 auto resetState = [&]() {
774 Stores.clear();
775 LoadValsSinceLastStore.clear();
776 };
777
778 for (auto &MBB : MF) {
779 // We're looking inside a single BB at a time since the memset pattern
780 // should only be in a single block.
781 resetState();
782 for (auto &MI : MBB) {
783 // Skip for scalable vectors
784 if (auto *LdSt = dyn_cast<GLoadStore>(&MI);
785 LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())
786 continue;
787
788 if (auto *St = dyn_cast<GStore>(&MI)) {
789 Register PtrBaseReg;
791 LLT StoredValTy = MRI.getType(St->getValueReg());
792 unsigned ValSize = StoredValTy.getSizeInBits();
793 if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)
794 continue;
795
796 Register PtrReg = St->getPointerReg();
797 if (mi_match(
798 PtrReg, MRI,
799 m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {
800 GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));
801 StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};
802
803 if (Stores.empty()) {
804 Stores.push_back(New);
805 continue;
806 }
807
808 // Check if this store is a valid continuation of the sequence.
809 auto &Last = Stores.back();
810 if (storeIsValid(Last, New)) {
811 Stores.push_back(New);
812 LoadValsSinceLastStore.clear(); // Reset the load value tracking.
813 } else {
814 // The store isn't a valid to consider for the prior sequence,
815 // so try to optimize what we have so far and start a new sequence.
816 Changed |= tryOptimizeConsecStores(Stores, MIB);
817 resetState();
818 Stores.push_back(New);
819 }
820 }
821 } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {
822 LoadValsSinceLastStore.push_back(Ld->getDstReg());
823 }
824 }
825 Changed |= tryOptimizeConsecStores(Stores, MIB);
826 resetState();
827 }
828
829 return Changed;
830}
831
832bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
834 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
835 bool EnableOpt, bool IsOptNone) {
836 if (MF.getProperties().hasFailedISel())
837 return false;
838 const Function &F = MF.getFunction();
839
841 const LegalizerInfo *LI = ST.getLegalizerInfo();
842
843 CombinerInfo CInfo(/*AllowIllegalOps=*/false, /*ShouldLegalizeIllegal=*/false,
844 /*LegalizerInfo=*/LI, EnableOpt, F.hasOptSize(),
845 F.hasMinSize());
846 // Disable fixed-point iteration to reduce compile-time
847 CInfo.MaxIterations = 1;
848 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
849 // Legalizer performs DCE, so a full DCE pass is unnecessary.
850 CInfo.EnableFullDCE = false;
851 AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
852 MDT, LI);
853 bool Changed = Impl.combineMachineInstrs();
854
855 CSEMIRBuilder MIB(MF);
856 MIB.setCSEInfo(CSEInfo);
857 Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
858 return Changed;
859}
860
861class AArch64PostLegalizerCombinerLegacy : public MachineFunctionPass {
862public:
863 static char ID;
864
865 AArch64PostLegalizerCombinerLegacy(bool IsOptNone = false);
866
867 StringRef getPassName() const override {
868 return "AArch64PostLegalizerCombiner";
869 }
870
871 bool runOnMachineFunction(MachineFunction &MF) override;
872 void getAnalysisUsage(AnalysisUsage &AU) const override;
873
874 MachineFunctionProperties getRequiredProperties() const override {
875 return MachineFunctionProperties().set(
876 MachineFunctionProperties::Property::Legalized);
877 }
878
879private:
880 bool IsOptNone;
881 AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
882};
883} // end anonymous namespace
884
885void AArch64PostLegalizerCombinerLegacy::getAnalysisUsage(
886 AnalysisUsage &AU) const {
887 AU.setPreservesCFG();
889 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
890 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
891 if (!IsOptNone) {
892 AU.addRequired<MachineDominatorTreeWrapperPass>();
893 AU.addPreserved<MachineDominatorTreeWrapperPass>();
894 AU.addRequired<GISelCSEAnalysisWrapperPass>();
895 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
896 }
898}
899
900AArch64PostLegalizerCombinerLegacy::AArch64PostLegalizerCombinerLegacy(
901 bool IsOptNone)
902 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
903 if (!RuleConfig.parseCommandLineOption())
904 reportFatalUsageError("Invalid rule identifier");
905}
906
907bool AArch64PostLegalizerCombinerLegacy::runOnMachineFunction(
908 MachineFunction &MF) {
909 if (MF.getProperties().hasFailedISel())
910 return false;
911
912 GISelValueTracking *VT =
913 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
914 MachineDominatorTree *MDT =
915 IsOptNone ? nullptr
916 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
917 GISelCSEAnalysisWrapper &Wrapper =
918 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
919 auto *CSEInfo =
921
922 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
923 !skipFunction(MF.getFunction());
924
925 return runCombiner(MF, CSEInfo, VT, MDT, RuleConfig, EnableOpt, IsOptNone);
926}
927
928char AArch64PostLegalizerCombinerLegacy::ID = 0;
929INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombinerLegacy, DEBUG_TYPE,
930 "Combine AArch64 MachineInstrs after legalization", false,
931 false)
933INITIALIZE_PASS_END(AArch64PostLegalizerCombinerLegacy, DEBUG_TYPE,
934 "Combine AArch64 MachineInstrs after legalization", false,
935 false)
936
939 : RuleConfig(
940 std::make_unique<AArch64PostLegalizerCombinerImplRuleConfig>()),
941 TM(TM) {
942 if (!RuleConfig->parseCommandLineOption())
943 reportFatalUsageError("invalid rule identifier");
944}
945
948
950
954 if (MF.getProperties().hasFailedISel())
955 return PreservedAnalyses::all();
956
957 const bool IsOptNone = TM->isGlobalISelOptNone();
958 bool EnableOpt = !IsOptNone;
959
962 IsOptNone ? nullptr : &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
963 GISelCSEInfo *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(MF).get();
964
965 if (!runCombiner(MF, CSEInfo, VT, MDT, *RuleConfig, EnableOpt, IsOptNone))
966 return PreservedAnalyses::all();
967
972 return PA;
973}
974
975namespace llvm {
977 return new AArch64PostLegalizerCombinerLegacy(IsOptNone);
978}
979} // end namespace llvm
MachineInstrBuilder & UseMI
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
aarch64 promote const
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
R600 Clause Merge
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AArch64PostLegalizerCombinerPass(const AArch64TargetMachine *TM)
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned logBase2() const
Definition APInt.h:1784
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:275
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val) override
Build and insert Res = G_CONSTANT Val.
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
bool maskedValueIsZero(Register Val, const APInt &Mask)
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Represents a G_PTR_ADD.
Represents a G_STORE.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
GISelChangeObserver * getObserver()
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
void setCSEInfo(GISelCSEInfo *Info)
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getNumOperands() const
Retuns the total number of operands.
const MachineOperand & getOperand(unsigned i) const
ArrayRef< int > getShuffleMask() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
virtual const TargetLowering * getTargetLowering() const
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP > m_GICmp(const Pred &P, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:656
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1530
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:497
FunctionPass * createAArch64PostLegalizerCombinerLegacy(bool IsOptNone)
LLVM_ABI std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)
Definition CSEInfo.cpp:85
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1150
@ Sub
Subtraction of integers.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:436
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...