LLVM 22.0.0git
AArch64PreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
31
32#define GET_GICOMBINER_DEPS
33#include "AArch64GenPreLegalizeGICombiner.inc"
34#undef GET_GICOMBINER_DEPS
35
36#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
37
38using namespace llvm;
39using namespace MIPatternMatch;
40
41namespace {
42
43#define GET_GICOMBINER_TYPES
44#include "AArch64GenPreLegalizeGICombiner.inc"
45#undef GET_GICOMBINER_TYPES
46
47/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
48/// are sign bits. In this case, we can transform the G_ICMP to directly compare
49/// the wide value with a zero.
50bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
51 GISelValueTracking *VT, Register &MatchInfo) {
52 assert(MI.getOpcode() == TargetOpcode::G_ICMP && VT);
53
54 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
55 if (!ICmpInst::isEquality(Pred))
56 return false;
57
58 Register LHS = MI.getOperand(2).getReg();
59 LLT LHSTy = MRI.getType(LHS);
60 if (!LHSTy.isScalar())
61 return false;
62
63 Register RHS = MI.getOperand(3).getReg();
64 Register WideReg;
65
66 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
68 return false;
69
70 LLT WideTy = MRI.getType(WideReg);
71 if (VT->computeNumSignBits(WideReg) <=
72 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
73 return false;
74
75 MatchInfo = WideReg;
76 return true;
77}
78
79void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
80 MachineIRBuilder &Builder,
81 GISelChangeObserver &Observer, Register &WideReg) {
82 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
83
84 LLT WideTy = MRI.getType(WideReg);
85 // We're going to directly use the wide register as the LHS, and then use an
86 // equivalent size zero for RHS.
87 Builder.setInstrAndDebugLoc(MI);
88 auto WideZero = Builder.buildConstant(WideTy, 0);
89 Observer.changingInstr(MI);
90 MI.getOperand(2).setReg(WideReg);
91 MI.getOperand(3).setReg(WideZero.getReg(0));
92 Observer.changedInstr(MI);
93}
94
95/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
96///
97/// e.g.
98///
99/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
100bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
101 std::pair<uint64_t, uint64_t> &MatchInfo) {
102 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
103 MachineFunction &MF = *MI.getMF();
104 auto &GlobalOp = MI.getOperand(1);
105 auto *GV = GlobalOp.getGlobal();
106 if (GV->isThreadLocal())
107 return false;
108
109 // Don't allow anything that could represent offsets etc.
111 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
112 return false;
113
114 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
115 //
116 // %g = G_GLOBAL_VALUE @x
117 // %ptr1 = G_PTR_ADD %g, cst1
118 // %ptr2 = G_PTR_ADD %g, cst2
119 // ...
120 // %ptrN = G_PTR_ADD %g, cstN
121 //
122 // Identify the *smallest* constant. We want to be able to form this:
123 //
124 // %offset_g = G_GLOBAL_VALUE @x + min_cst
125 // %g = G_PTR_ADD %offset_g, -min_cst
126 // %ptr1 = G_PTR_ADD %g, cst1
127 // ...
128 Register Dst = MI.getOperand(0).getReg();
129 uint64_t MinOffset = -1ull;
130 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
131 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
132 return false;
134 UseInstr.getOperand(2).getReg(), MRI);
135 if (!Cst)
136 return false;
137 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
138 }
139
140 // Require that the new offset is larger than the existing one to avoid
141 // infinite loops.
142 uint64_t CurrOffset = GlobalOp.getOffset();
143 uint64_t NewOffset = MinOffset + CurrOffset;
144 if (NewOffset <= CurrOffset)
145 return false;
146
147 // Check whether folding this offset is legal. It must not go out of bounds of
148 // the referenced object to avoid violating the code model, and must be
149 // smaller than 2^20 because this is the largest offset expressible in all
150 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
151 // stores an immediate signed 21 bit offset.)
152 //
153 // This check also prevents us from folding negative offsets, which will end
154 // up being treated in the same way as large positive ones. They could also
155 // cause code model violations, and aren't really common enough to matter.
156 if (NewOffset >= (1 << 20))
157 return false;
158
159 Type *T = GV->getValueType();
160 if (!T->isSized() ||
161 NewOffset > GV->getDataLayout().getTypeAllocSize(T))
162 return false;
163 MatchInfo = std::make_pair(NewOffset, MinOffset);
164 return true;
165}
166
167void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
169 std::pair<uint64_t, uint64_t> &MatchInfo) {
170 // Change:
171 //
172 // %g = G_GLOBAL_VALUE @x
173 // %ptr1 = G_PTR_ADD %g, cst1
174 // %ptr2 = G_PTR_ADD %g, cst2
175 // ...
176 // %ptrN = G_PTR_ADD %g, cstN
177 //
178 // To:
179 //
180 // %offset_g = G_GLOBAL_VALUE @x + min_cst
181 // %g = G_PTR_ADD %offset_g, -min_cst
182 // %ptr1 = G_PTR_ADD %g, cst1
183 // ...
184 // %ptrN = G_PTR_ADD %g, cstN
185 //
186 // Then, the original G_PTR_ADDs should be folded later on so that they look
187 // like this:
188 //
189 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
190 uint64_t Offset, MinOffset;
191 std::tie(Offset, MinOffset) = MatchInfo;
192 B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
193 Observer.changingInstr(MI);
194 auto &GlobalOp = MI.getOperand(1);
195 auto *GV = GlobalOp.getGlobal();
196 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
197 Register Dst = MI.getOperand(0).getReg();
198 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
199 MI.getOperand(0).setReg(NewGVDst);
200 Observer.changedInstr(MI);
201 B.buildPtrAdd(
202 Dst, NewGVDst,
203 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
204}
205
206// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add([us]dot(x, y))
207// Or vecreduce_add(ext(mul(ext(x), ext(y)))) -> vecreduce_add([us]dot(x, y))
208// Or vecreduce_add(ext(x)) -> vecreduce_add([us]dot(x, 1))
209// Similar to performVecReduceAddCombine in SelectionDAG
210bool matchExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
211 const AArch64Subtarget &STI,
212 std::tuple<Register, Register, bool> &MatchInfo) {
213 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
214 "Expected a G_VECREDUCE_ADD instruction");
215 assert(STI.hasDotProd() && "Target should have Dot Product feature");
216
217 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
218 Register DstReg = MI.getOperand(0).getReg();
219 Register MidReg = I1->getOperand(0).getReg();
220 LLT DstTy = MRI.getType(DstReg);
221 LLT MidTy = MRI.getType(MidReg);
222 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
223 return false;
224
225 // Detect mul(ext, ext) with symmetric ext's. If I1Opc is G_ZEXT or G_SEXT
226 // then the ext's must match the same opcode. It is set to the ext opcode on
227 // output.
228 auto tryMatchingMulOfExt = [&MRI](MachineInstr *MI, Register &Out1,
229 Register &Out2, unsigned &I1Opc) {
230 // If result of this has more than 1 use, then there is no point in creating
231 // a dot instruction
232 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
233 return false;
234
235 MachineInstr *ExtMI1 =
236 getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
237 MachineInstr *ExtMI2 =
238 getDefIgnoringCopies(MI->getOperand(2).getReg(), MRI);
239 LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
240 LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
241
242 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
243 return false;
244 if ((I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) &&
245 I1Opc != ExtMI1->getOpcode())
246 return false;
247 Out1 = ExtMI1->getOperand(1).getReg();
248 Out2 = ExtMI2->getOperand(1).getReg();
249 I1Opc = ExtMI1->getOpcode();
250 return true;
251 };
252
253 LLT SrcTy;
254 unsigned I1Opc = I1->getOpcode();
255 if (I1Opc == TargetOpcode::G_MUL) {
256 Register Out1, Out2;
257 if (!tryMatchingMulOfExt(I1, Out1, Out2, I1Opc))
258 return false;
259 SrcTy = MRI.getType(Out1);
260 std::get<0>(MatchInfo) = Out1;
261 std::get<1>(MatchInfo) = Out2;
262 } else if (I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) {
263 Register I1Op = I1->getOperand(1).getReg();
265 Register Out1, Out2;
266 if (M->getOpcode() == TargetOpcode::G_MUL &&
267 tryMatchingMulOfExt(M, Out1, Out2, I1Opc)) {
268 SrcTy = MRI.getType(Out1);
269 std::get<0>(MatchInfo) = Out1;
270 std::get<1>(MatchInfo) = Out2;
271 } else {
272 SrcTy = MRI.getType(I1Op);
273 std::get<0>(MatchInfo) = I1Op;
274 std::get<1>(MatchInfo) = 0;
275 }
276 } else {
277 return false;
278 }
279
280 if (I1Opc == TargetOpcode::G_ZEXT)
281 std::get<2>(MatchInfo) = 0;
282 else if (I1Opc == TargetOpcode::G_SEXT)
283 std::get<2>(MatchInfo) = 1;
284 else
285 return false;
286
287 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
288 return false;
289
290 return true;
291}
292
293void applyExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
294 MachineIRBuilder &Builder,
295 GISelChangeObserver &Observer,
296 const AArch64Subtarget &STI,
297 std::tuple<Register, Register, bool> &MatchInfo) {
298 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
299 "Expected a G_VECREDUCE_ADD instruction");
300 assert(STI.hasDotProd() && "Target should have Dot Product feature");
301
302 // Initialise the variables
303 unsigned DotOpcode =
304 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
305 Register Ext1SrcReg = std::get<0>(MatchInfo);
306
307 // If there is one source register, create a vector of 0s as the second
308 // source register
309 Register Ext2SrcReg;
310 if (std::get<1>(MatchInfo) == 0)
311 Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
312 ->getOperand(0)
313 .getReg();
314 else
315 Ext2SrcReg = std::get<1>(MatchInfo);
316
317 // Find out how many DOT instructions are needed
318 LLT SrcTy = MRI.getType(Ext1SrcReg);
319 LLT MidTy;
320 unsigned NumOfDotMI;
321 if (SrcTy.getNumElements() % 16 == 0) {
322 NumOfDotMI = SrcTy.getNumElements() / 16;
323 MidTy = LLT::fixed_vector(4, 32);
324 } else if (SrcTy.getNumElements() % 8 == 0) {
325 NumOfDotMI = SrcTy.getNumElements() / 8;
326 MidTy = LLT::fixed_vector(2, 32);
327 } else {
328 llvm_unreachable("Source type number of elements is not multiple of 8");
329 }
330
331 // Handle case where one DOT instruction is needed
332 if (NumOfDotMI == 1) {
333 auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
334 auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
335 {Zeroes, Ext1SrcReg, Ext2SrcReg});
336 Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
337 } else {
338 // If not pad the last v8 element with 0s to a v16
339 SmallVector<Register, 4> Ext1UnmergeReg;
340 SmallVector<Register, 4> Ext2UnmergeReg;
341 if (SrcTy.getNumElements() % 16 != 0) {
342 SmallVector<Register> Leftover1;
343 SmallVector<Register> Leftover2;
344
345 // Split the elements into v16i8 and v8i8
346 LLT MainTy = LLT::fixed_vector(16, 8);
347 LLT LeftoverTy1, LeftoverTy2;
348 if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
349 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
350 MRI)) ||
351 (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
352 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
353 MRI))) {
354 llvm_unreachable("Unable to split this vector properly");
355 }
356
357 // Pad the leftover v8i8 vector with register of 0s of type v8i8
358 Register v8Zeroes = Builder.buildConstant(LLT::fixed_vector(8, 8), 0)
359 ->getOperand(0)
360 .getReg();
361
362 Ext1UnmergeReg.push_back(
363 Builder
364 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
365 {Leftover1[0], v8Zeroes})
366 .getReg(0));
367 Ext2UnmergeReg.push_back(
368 Builder
369 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
370 {Leftover2[0], v8Zeroes})
371 .getReg(0));
372
373 } else {
374 // Unmerge the source vectors to v16i8
375 unsigned SrcNumElts = SrcTy.getNumElements();
376 extractParts(Ext1SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
377 Ext1UnmergeReg, Builder, MRI);
378 extractParts(Ext2SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
379 Ext2UnmergeReg, Builder, MRI);
380 }
381
382 // Build the UDOT instructions
384 unsigned NumElements = 0;
385 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
386 LLT ZeroesLLT;
387 // Check if it is 16 or 8 elements. Set Zeroes to the according size
388 if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
389 ZeroesLLT = LLT::fixed_vector(4, 32);
390 NumElements += 4;
391 } else {
392 ZeroesLLT = LLT::fixed_vector(2, 32);
393 NumElements += 2;
394 }
395 auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
396 DotReg.push_back(
397 Builder
398 .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
399 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
400 .getReg(0));
401 }
402
403 // Merge the output
404 auto ConcatMI =
405 Builder.buildConcatVectors(LLT::fixed_vector(NumElements, 32), DotReg);
406
407 // Put it through a vector reduction
408 Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
409 ConcatMI->getOperand(0).getReg());
410 }
411
412 // Erase the dead instructions
413 MI.eraseFromParent();
414}
415
416// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
417// Ensure that the type coming from the extend instruction is the right size
418bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
419 std::pair<Register, bool> &MatchInfo) {
420 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
421 "Expected G_VECREDUCE_ADD Opcode");
422
423 // Check if the last instruction is an extend
424 MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
425 auto ExtOpc = ExtMI->getOpcode();
426
427 if (ExtOpc == TargetOpcode::G_ZEXT)
428 std::get<1>(MatchInfo) = 0;
429 else if (ExtOpc == TargetOpcode::G_SEXT)
430 std::get<1>(MatchInfo) = 1;
431 else
432 return false;
433
434 // Check if the source register is a valid type
435 Register ExtSrcReg = ExtMI->getOperand(1).getReg();
436 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
437 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
438 if (ExtSrcTy.getScalarSizeInBits() * 2 > DstTy.getScalarSizeInBits())
439 return false;
440 if ((DstTy.getScalarSizeInBits() == 16 &&
441 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
442 (DstTy.getScalarSizeInBits() == 32 &&
443 ExtSrcTy.getNumElements() % 4 == 0) ||
444 (DstTy.getScalarSizeInBits() == 64 &&
445 ExtSrcTy.getNumElements() % 4 == 0)) {
446 std::get<0>(MatchInfo) = ExtSrcReg;
447 return true;
448 }
449 return false;
450}
451
452void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
454 std::pair<Register, bool> &MatchInfo) {
455 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
456 "Expected G_VECREDUCE_ADD Opcode");
457
458 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
459 Register SrcReg = std::get<0>(MatchInfo);
460 Register DstReg = MI.getOperand(0).getReg();
461 LLT SrcTy = MRI.getType(SrcReg);
462 LLT DstTy = MRI.getType(DstReg);
463
464 // If SrcTy has more elements than expected, split them into multiple
465 // insructions and sum the results
466 LLT MainTy;
467 SmallVector<Register, 1> WorkingRegisters;
468 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
469 unsigned SrcNumElem = SrcTy.getNumElements();
470 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
471 (SrcScalSize == 16 && SrcNumElem > 8) ||
472 (SrcScalSize == 32 && SrcNumElem > 4)) {
473
474 LLT LeftoverTy;
475 SmallVector<Register, 4> LeftoverRegs;
476 if (SrcScalSize == 8)
477 MainTy = LLT::fixed_vector(16, 8);
478 else if (SrcScalSize == 16)
479 MainTy = LLT::fixed_vector(8, 16);
480 else if (SrcScalSize == 32)
481 MainTy = LLT::fixed_vector(4, 32);
482 else
483 llvm_unreachable("Source's Scalar Size not supported");
484
485 // Extract the parts and put each extracted sources through U/SADDLV and put
486 // the values inside a small vec
487 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
488 LeftoverRegs, B, MRI);
489 llvm::append_range(WorkingRegisters, LeftoverRegs);
490 } else {
491 WorkingRegisters.push_back(SrcReg);
492 MainTy = SrcTy;
493 }
494
495 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
496 LLT MidScalarLLT = LLT::scalar(MidScalarSize);
497 Register ZeroReg = B.buildConstant(LLT::scalar(64), 0).getReg(0);
498 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
499 // If the number of elements is too small to build an instruction, extend
500 // its size before applying addlv
501 LLT WorkingRegTy = MRI.getType(WorkingRegisters[I]);
502 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
503 (WorkingRegTy.getNumElements() == 4)) {
504 WorkingRegisters[I] =
505 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
506 : TargetOpcode::G_ZEXT,
507 {LLT::fixed_vector(4, 16)}, {WorkingRegisters[I]})
508 .getReg(0);
509 }
510
511 // Generate the {U/S}ADDLV instruction, whose output is always double of the
512 // Src's Scalar size
513 LLT AddlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, 32)
514 : LLT::fixed_vector(2, 64);
515 Register AddlvReg =
516 B.buildInstr(Opc, {AddlvTy}, {WorkingRegisters[I]}).getReg(0);
517
518 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
519 // v2i64 register.
520 // i16, i32 results uses v4i32 registers
521 // i64 results uses v2i64 registers
522 // Therefore we have to extract/truncate the the value to the right type
523 if (MidScalarSize == 32 || MidScalarSize == 64) {
524 WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
525 {MidScalarLLT}, {AddlvReg, ZeroReg})
526 .getReg(0);
527 } else {
528 Register ExtractReg = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
529 {LLT::scalar(32)}, {AddlvReg, ZeroReg})
530 .getReg(0);
531 WorkingRegisters[I] =
532 B.buildTrunc({MidScalarLLT}, {ExtractReg}).getReg(0);
533 }
534 }
535
536 Register OutReg;
537 if (WorkingRegisters.size() > 1) {
538 OutReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
539 .getReg(0);
540 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
541 OutReg = B.buildAdd(MidScalarLLT, OutReg, WorkingRegisters[I]).getReg(0);
542 }
543 } else {
544 OutReg = WorkingRegisters[0];
545 }
546
547 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
548 // Handle the scalar value if the DstTy's Scalar Size is more than double
549 // Src's ScalarType
550 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
551 : TargetOpcode::G_ZEXT,
552 {DstReg}, {OutReg});
553 } else {
554 B.buildCopy(DstReg, OutReg);
555 }
556
557 MI.eraseFromParent();
558}
559
560// Pushes ADD/SUB/MUL through extend instructions to decrease the number of
561// extend instruction at the end by allowing selection of {s|u}addl sooner
562// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
563bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
564 Register DstReg, Register SrcReg1, Register SrcReg2) {
565 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
566 MI.getOpcode() == TargetOpcode::G_SUB ||
567 MI.getOpcode() == TargetOpcode::G_MUL) &&
568 "Expected a G_ADD, G_SUB or G_MUL instruction\n");
569
570 // Deal with vector types only
571 LLT DstTy = MRI.getType(DstReg);
572 if (!DstTy.isVector())
573 return false;
574
575 // Return true if G_{S|Z}EXT instruction is more than 2* source
576 Register ExtDstReg = MI.getOperand(1).getReg();
577 LLT Ext1SrcTy = MRI.getType(SrcReg1);
578 LLT Ext2SrcTy = MRI.getType(SrcReg2);
579 unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits();
580 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
581 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
582 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
583 Ext1SrcTy == Ext2SrcTy)
584 return true;
585
586 return false;
587}
588
589void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
590 MachineIRBuilder &B, bool isSExt, Register DstReg,
591 Register SrcReg1, Register SrcReg2) {
592 LLT SrcTy = MRI.getType(SrcReg1);
593 LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2);
594 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
595 Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0);
596 Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0);
597 Register AddReg =
598 B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
599
600 // G_SUB has to sign-extend the result.
601 // G_ADD needs to sext from sext and can sext or zext from zext, and G_MUL
602 // needs to use the original opcode so the original opcode is used for both.
603 if (MI.getOpcode() == TargetOpcode::G_ADD ||
604 MI.getOpcode() == TargetOpcode::G_MUL)
605 B.buildInstr(Opc, {DstReg}, {AddReg});
606 else
607 B.buildSExt(DstReg, AddReg);
608
609 MI.eraseFromParent();
610}
611
612bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
613 const CombinerHelper &Helper,
614 GISelChangeObserver &Observer) {
615 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
616 // result is only used in the no-overflow case. It is restricted to cases
617 // where we know that the high-bits of the operands are 0. If there's an
618 // overflow, then the 9th or 17th bit must be set, which can be checked
619 // using TBNZ.
620 //
621 // Change (for UADDOs on 8 and 16 bits):
622 //
623 // %z0 = G_ASSERT_ZEXT _
624 // %op0 = G_TRUNC %z0
625 // %z1 = G_ASSERT_ZEXT _
626 // %op1 = G_TRUNC %z1
627 // %val, %cond = G_UADDO %op0, %op1
628 // G_BRCOND %cond, %error.bb
629 //
630 // error.bb:
631 // (no successors and no uses of %val)
632 //
633 // To:
634 //
635 // %z0 = G_ASSERT_ZEXT _
636 // %z1 = G_ASSERT_ZEXT _
637 // %add = G_ADD %z0, %z1
638 // %val = G_TRUNC %add
639 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
640 // %cond = G_ICMP NE, %bit, 0
641 // G_BRCOND %cond, %error.bb
642
643 auto &MRI = *B.getMRI();
644
645 MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
646 MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
647 Register Op0Wide;
648 Register Op1Wide;
649 if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
650 !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
651 return false;
652 LLT WideTy0 = MRI.getType(Op0Wide);
653 LLT WideTy1 = MRI.getType(Op1Wide);
654 Register ResVal = MI.getOperand(0).getReg();
655 LLT OpTy = MRI.getType(ResVal);
656 MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
657 MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
658
659 unsigned OpTySize = OpTy.getScalarSizeInBits();
660 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
661 // inputs have been zero-extended.
662 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
663 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
664 OpTySize != Op0WideDef->getOperand(2).getImm() ||
665 OpTySize != Op1WideDef->getOperand(2).getImm())
666 return false;
667
668 // Only scalar UADDO with either 8 or 16 bit operands are handled.
669 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
670 OpTySize >= WideTy0.getScalarSizeInBits() ||
671 (OpTySize != 8 && OpTySize != 16))
672 return false;
673
674 // The overflow-status result must be used by a branch only.
675 Register ResStatus = MI.getOperand(1).getReg();
676 if (!MRI.hasOneNonDBGUse(ResStatus))
677 return false;
678 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
679 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
680 return false;
681
682 // Make sure the computed result is only used in the no-overflow blocks.
683 MachineBasicBlock *CurrentMBB = MI.getParent();
684 MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
685 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
686 return false;
687 if (any_of(MRI.use_nodbg_instructions(ResVal),
688 [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
689 return &MI != &I &&
690 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
691 }))
692 return false;
693
694 // Remove G_ADDO.
695 B.setInstrAndDebugLoc(*MI.getNextNode());
696 MI.eraseFromParent();
697
698 // Emit wide add.
699 Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
700 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
701
702 // Emit check of the 9th or 17th bit and update users (the branch). This will
703 // later be folded to TBNZ.
704 Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
705 B.buildAnd(
706 CondBit, AddDst,
707 B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
708 B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
709 B.buildConstant(LLT::scalar(32), 0));
710
711 // Update ZEXts users of the result value. Because all uses are in the
712 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
713 B.buildZExtOrTrunc(ResVal, AddDst);
714 for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
715 Register WideReg;
716 if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
717 auto OldR = U.getParent()->getOperand(0).getReg();
718 Observer.erasingInstr(*U.getParent());
719 U.getParent()->eraseFromParent();
720 Helper.replaceRegWith(MRI, OldR, AddDst);
721 }
722 }
723
724 return true;
725}
726
727class AArch64PreLegalizerCombinerImpl : public Combiner {
728protected:
729 const CombinerHelper Helper;
730 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
731 const AArch64Subtarget &STI;
732
733public:
734 AArch64PreLegalizerCombinerImpl(
735 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
736 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
737 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
738 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
739 const LegalizerInfo *LI);
740
741 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
742
743 bool tryCombineAll(MachineInstr &I) const override;
744
745 bool tryCombineAllImpl(MachineInstr &I) const;
746
747private:
748#define GET_GICOMBINER_CLASS_MEMBERS
749#include "AArch64GenPreLegalizeGICombiner.inc"
750#undef GET_GICOMBINER_CLASS_MEMBERS
751};
752
753#define GET_GICOMBINER_IMPL
754#include "AArch64GenPreLegalizeGICombiner.inc"
755#undef GET_GICOMBINER_IMPL
756
757AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
758 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
759 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
760 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
761 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
762 const LegalizerInfo *LI)
763 : Combiner(MF, CInfo, TPC, &VT, CSEInfo),
764 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
765 RuleConfig(RuleConfig), STI(STI),
767#include "AArch64GenPreLegalizeGICombiner.inc"
769{
770}
771
772bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
773 if (tryCombineAllImpl(MI))
774 return true;
775
776 unsigned Opc = MI.getOpcode();
777 switch (Opc) {
778 case TargetOpcode::G_SHUFFLE_VECTOR:
779 return Helper.tryCombineShuffleVector(MI);
780 case TargetOpcode::G_UADDO:
781 return tryToSimplifyUADDO(MI, B, Helper, Observer);
782 case TargetOpcode::G_MEMCPY_INLINE:
783 return Helper.tryEmitMemcpyInline(MI);
784 case TargetOpcode::G_MEMCPY:
785 case TargetOpcode::G_MEMMOVE:
786 case TargetOpcode::G_MEMSET: {
787 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
788 // heuristics decide.
789 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
790 // Try to inline memcpy type calls if optimizations are enabled.
791 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
792 return true;
793 if (Opc == TargetOpcode::G_MEMSET)
794 return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, CInfo.EnableMinSize);
795 return false;
796 }
797 }
798
799 return false;
800}
801
802// Pass boilerplate
803// ================
804
805class AArch64PreLegalizerCombiner : public MachineFunctionPass {
806public:
807 static char ID;
808
809 AArch64PreLegalizerCombiner();
810
811 StringRef getPassName() const override {
812 return "AArch64PreLegalizerCombiner";
813 }
814
815 bool runOnMachineFunction(MachineFunction &MF) override;
816
817 void getAnalysisUsage(AnalysisUsage &AU) const override;
818
819private:
820 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
821};
822} // end anonymous namespace
823
824void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
825 AU.addRequired<TargetPassConfig>();
826 AU.setPreservesCFG();
828 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
829 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
830 AU.addRequired<MachineDominatorTreeWrapperPass>();
831 AU.addPreserved<MachineDominatorTreeWrapperPass>();
832 AU.addRequired<GISelCSEAnalysisWrapperPass>();
833 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
835}
836
837AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
838 : MachineFunctionPass(ID) {
839 if (!RuleConfig.parseCommandLineOption())
840 report_fatal_error("Invalid rule identifier");
841}
842
843bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
844 if (MF.getProperties().hasFailedISel())
845 return false;
846 auto &TPC = getAnalysis<TargetPassConfig>();
847
848 // Enable CSE.
850 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
851 auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
852
854 const auto *LI = ST.getLegalizerInfo();
855
856 const Function &F = MF.getFunction();
857 bool EnableOpt =
858 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
860 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
862 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
863 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
864 /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
865 F.hasMinSize());
866 // Disable fixed-point iteration to reduce compile-time
867 CInfo.MaxIterations = 1;
868 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
869 // This is the first Combiner, so the input IR might contain dead
870 // instructions.
871 CInfo.EnableFullDCE = true;
872 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *VT, CSEInfo,
873 RuleConfig, ST, MDT, LI);
874 return Impl.combineMachineInstrs();
875}
876
877char AArch64PreLegalizerCombiner::ID = 0;
878INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
879 "Combine AArch64 machine instrs before legalization",
880 false, false)
884INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
885 "Combine AArch64 machine instrs before legalization", false,
886 false)
887
888namespace llvm {
890 return new AArch64PreLegalizerCombiner();
891}
892} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
Combiner implementation.
Definition Combiner.h:34
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The actual analysis pass wrapper.
Definition CSEInfo.h:229
Simple wrapper that does the following.
Definition CSEInfo.h:211
The CSE Analysis object.
Definition CSEInfo.h:71
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64PreLegalizerCombiner()
@ Offset
Definition DWP.cpp:532
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:506
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1184
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...