LLVM 20.0.0git
AArch64PreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
31
32#define GET_GICOMBINER_DEPS
33#include "AArch64GenPreLegalizeGICombiner.inc"
34#undef GET_GICOMBINER_DEPS
35
36#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
37
38using namespace llvm;
39using namespace MIPatternMatch;
40
41namespace {
42
43#define GET_GICOMBINER_TYPES
44#include "AArch64GenPreLegalizeGICombiner.inc"
45#undef GET_GICOMBINER_TYPES
46
47/// Return true if a G_FCONSTANT instruction is known to be better-represented
48/// as a G_CONSTANT.
49bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
50 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
51 Register DstReg = MI.getOperand(0).getReg();
52 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
53 if (DstSize != 32 && DstSize != 64)
54 return false;
55
56 // When we're storing a value, it doesn't matter what register bank it's on.
57 // Since not all floating point constants can be materialized using a fmov,
58 // it makes more sense to just use a GPR.
59 return all_of(MRI.use_nodbg_instructions(DstReg),
60 [](const MachineInstr &Use) { return Use.mayStore(); });
61}
62
63/// Change a G_FCONSTANT into a G_CONSTANT.
64void applyFConstantToConstant(MachineInstr &MI) {
65 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
67 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
68 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
69 MI.eraseFromParent();
70}
71
72/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
73/// are sign bits. In this case, we can transform the G_ICMP to directly compare
74/// the wide value with a zero.
75bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
76 GISelKnownBits *KB, Register &MatchInfo) {
77 assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
78
79 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
80 if (!ICmpInst::isEquality(Pred))
81 return false;
82
83 Register LHS = MI.getOperand(2).getReg();
84 LLT LHSTy = MRI.getType(LHS);
85 if (!LHSTy.isScalar())
86 return false;
87
88 Register RHS = MI.getOperand(3).getReg();
89 Register WideReg;
90
91 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
92 !mi_match(RHS, MRI, m_SpecificICst(0)))
93 return false;
94
95 LLT WideTy = MRI.getType(WideReg);
96 if (KB->computeNumSignBits(WideReg) <=
97 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
98 return false;
99
100 MatchInfo = WideReg;
101 return true;
102}
103
104void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
105 MachineIRBuilder &Builder,
106 GISelChangeObserver &Observer, Register &WideReg) {
107 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
108
109 LLT WideTy = MRI.getType(WideReg);
110 // We're going to directly use the wide register as the LHS, and then use an
111 // equivalent size zero for RHS.
112 Builder.setInstrAndDebugLoc(MI);
113 auto WideZero = Builder.buildConstant(WideTy, 0);
114 Observer.changingInstr(MI);
115 MI.getOperand(2).setReg(WideReg);
116 MI.getOperand(3).setReg(WideZero.getReg(0));
117 Observer.changedInstr(MI);
118}
119
120/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
121///
122/// e.g.
123///
124/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
125bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
126 std::pair<uint64_t, uint64_t> &MatchInfo) {
127 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
128 MachineFunction &MF = *MI.getMF();
129 auto &GlobalOp = MI.getOperand(1);
130 auto *GV = GlobalOp.getGlobal();
131 if (GV->isThreadLocal())
132 return false;
133
134 // Don't allow anything that could represent offsets etc.
136 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
137 return false;
138
139 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
140 //
141 // %g = G_GLOBAL_VALUE @x
142 // %ptr1 = G_PTR_ADD %g, cst1
143 // %ptr2 = G_PTR_ADD %g, cst2
144 // ...
145 // %ptrN = G_PTR_ADD %g, cstN
146 //
147 // Identify the *smallest* constant. We want to be able to form this:
148 //
149 // %offset_g = G_GLOBAL_VALUE @x + min_cst
150 // %g = G_PTR_ADD %offset_g, -min_cst
151 // %ptr1 = G_PTR_ADD %g, cst1
152 // ...
153 Register Dst = MI.getOperand(0).getReg();
154 uint64_t MinOffset = -1ull;
155 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
156 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
157 return false;
159 UseInstr.getOperand(2).getReg(), MRI);
160 if (!Cst)
161 return false;
162 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
163 }
164
165 // Require that the new offset is larger than the existing one to avoid
166 // infinite loops.
167 uint64_t CurrOffset = GlobalOp.getOffset();
168 uint64_t NewOffset = MinOffset + CurrOffset;
169 if (NewOffset <= CurrOffset)
170 return false;
171
172 // Check whether folding this offset is legal. It must not go out of bounds of
173 // the referenced object to avoid violating the code model, and must be
174 // smaller than 2^20 because this is the largest offset expressible in all
175 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
176 // stores an immediate signed 21 bit offset.)
177 //
178 // This check also prevents us from folding negative offsets, which will end
179 // up being treated in the same way as large positive ones. They could also
180 // cause code model violations, and aren't really common enough to matter.
181 if (NewOffset >= (1 << 20))
182 return false;
183
184 Type *T = GV->getValueType();
185 if (!T->isSized() ||
186 NewOffset > GV->getDataLayout().getTypeAllocSize(T))
187 return false;
188 MatchInfo = std::make_pair(NewOffset, MinOffset);
189 return true;
190}
191
192void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
194 std::pair<uint64_t, uint64_t> &MatchInfo) {
195 // Change:
196 //
197 // %g = G_GLOBAL_VALUE @x
198 // %ptr1 = G_PTR_ADD %g, cst1
199 // %ptr2 = G_PTR_ADD %g, cst2
200 // ...
201 // %ptrN = G_PTR_ADD %g, cstN
202 //
203 // To:
204 //
205 // %offset_g = G_GLOBAL_VALUE @x + min_cst
206 // %g = G_PTR_ADD %offset_g, -min_cst
207 // %ptr1 = G_PTR_ADD %g, cst1
208 // ...
209 // %ptrN = G_PTR_ADD %g, cstN
210 //
211 // Then, the original G_PTR_ADDs should be folded later on so that they look
212 // like this:
213 //
214 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
215 uint64_t Offset, MinOffset;
216 std::tie(Offset, MinOffset) = MatchInfo;
217 B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
218 Observer.changingInstr(MI);
219 auto &GlobalOp = MI.getOperand(1);
220 auto *GV = GlobalOp.getGlobal();
221 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
222 Register Dst = MI.getOperand(0).getReg();
223 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
224 MI.getOperand(0).setReg(NewGVDst);
225 Observer.changedInstr(MI);
226 B.buildPtrAdd(
227 Dst, NewGVDst,
228 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
229}
230
231// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add(udot(x, y))
232// Or vecreduce_add(ext(x)) -> vecreduce_add(udot(x, 1))
233// Similar to performVecReduceAddCombine in SelectionDAG
234bool matchExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
235 const AArch64Subtarget &STI,
236 std::tuple<Register, Register, bool> &MatchInfo) {
237 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
238 "Expected a G_VECREDUCE_ADD instruction");
239 assert(STI.hasDotProd() && "Target should have Dot Product feature");
240
241 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
242 Register DstReg = MI.getOperand(0).getReg();
243 Register MidReg = I1->getOperand(0).getReg();
244 LLT DstTy = MRI.getType(DstReg);
245 LLT MidTy = MRI.getType(MidReg);
246 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
247 return false;
248
249 LLT SrcTy;
250 auto I1Opc = I1->getOpcode();
251 if (I1Opc == TargetOpcode::G_MUL) {
252 // If result of this has more than 1 use, then there is no point in creating
253 // udot instruction
254 if (!MRI.hasOneNonDBGUse(MidReg))
255 return false;
256
257 MachineInstr *ExtMI1 =
258 getDefIgnoringCopies(I1->getOperand(1).getReg(), MRI);
259 MachineInstr *ExtMI2 =
260 getDefIgnoringCopies(I1->getOperand(2).getReg(), MRI);
261 LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
262 LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
263
264 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
265 return false;
266 I1Opc = ExtMI1->getOpcode();
267 SrcTy = MRI.getType(ExtMI1->getOperand(1).getReg());
268 std::get<0>(MatchInfo) = ExtMI1->getOperand(1).getReg();
269 std::get<1>(MatchInfo) = ExtMI2->getOperand(1).getReg();
270 } else {
271 SrcTy = MRI.getType(I1->getOperand(1).getReg());
272 std::get<0>(MatchInfo) = I1->getOperand(1).getReg();
273 std::get<1>(MatchInfo) = 0;
274 }
275
276 if (I1Opc == TargetOpcode::G_ZEXT)
277 std::get<2>(MatchInfo) = 0;
278 else if (I1Opc == TargetOpcode::G_SEXT)
279 std::get<2>(MatchInfo) = 1;
280 else
281 return false;
282
283 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
284 return false;
285
286 return true;
287}
288
289void applyExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
290 MachineIRBuilder &Builder,
291 GISelChangeObserver &Observer,
292 const AArch64Subtarget &STI,
293 std::tuple<Register, Register, bool> &MatchInfo) {
294 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
295 "Expected a G_VECREDUCE_ADD instruction");
296 assert(STI.hasDotProd() && "Target should have Dot Product feature");
297
298 // Initialise the variables
299 unsigned DotOpcode =
300 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
301 Register Ext1SrcReg = std::get<0>(MatchInfo);
302
303 // If there is one source register, create a vector of 0s as the second
304 // source register
305 Register Ext2SrcReg;
306 if (std::get<1>(MatchInfo) == 0)
307 Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
308 ->getOperand(0)
309 .getReg();
310 else
311 Ext2SrcReg = std::get<1>(MatchInfo);
312
313 // Find out how many DOT instructions are needed
314 LLT SrcTy = MRI.getType(Ext1SrcReg);
315 LLT MidTy;
316 unsigned NumOfDotMI;
317 if (SrcTy.getNumElements() % 16 == 0) {
318 NumOfDotMI = SrcTy.getNumElements() / 16;
319 MidTy = LLT::fixed_vector(4, 32);
320 } else if (SrcTy.getNumElements() % 8 == 0) {
321 NumOfDotMI = SrcTy.getNumElements() / 8;
322 MidTy = LLT::fixed_vector(2, 32);
323 } else {
324 llvm_unreachable("Source type number of elements is not multiple of 8");
325 }
326
327 // Handle case where one DOT instruction is needed
328 if (NumOfDotMI == 1) {
329 auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
330 auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
331 {Zeroes, Ext1SrcReg, Ext2SrcReg});
332 Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
333 } else {
334 // If not pad the last v8 element with 0s to a v16
335 SmallVector<Register, 4> Ext1UnmergeReg;
336 SmallVector<Register, 4> Ext2UnmergeReg;
337 if (SrcTy.getNumElements() % 16 != 0) {
338 SmallVector<Register> Leftover1;
339 SmallVector<Register> Leftover2;
340
341 // Split the elements into v16i8 and v8i8
342 LLT MainTy = LLT::fixed_vector(16, 8);
343 LLT LeftoverTy1, LeftoverTy2;
344 if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
345 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
346 MRI)) ||
347 (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
348 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
349 MRI))) {
350 llvm_unreachable("Unable to split this vector properly");
351 }
352
353 // Pad the leftover v8i8 vector with register of 0s of type v8i8
354 Register v8Zeroes = Builder.buildConstant(LLT::fixed_vector(8, 8), 0)
355 ->getOperand(0)
356 .getReg();
357
358 Ext1UnmergeReg.push_back(
359 Builder
360 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
361 {Leftover1[0], v8Zeroes})
362 .getReg(0));
363 Ext2UnmergeReg.push_back(
364 Builder
365 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
366 {Leftover2[0], v8Zeroes})
367 .getReg(0));
368
369 } else {
370 // Unmerge the source vectors to v16i8
371 unsigned SrcNumElts = SrcTy.getNumElements();
372 extractParts(Ext1SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
373 Ext1UnmergeReg, Builder, MRI);
374 extractParts(Ext2SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
375 Ext2UnmergeReg, Builder, MRI);
376 }
377
378 // Build the UDOT instructions
380 unsigned NumElements = 0;
381 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
382 LLT ZeroesLLT;
383 // Check if it is 16 or 8 elements. Set Zeroes to the according size
384 if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
385 ZeroesLLT = LLT::fixed_vector(4, 32);
386 NumElements += 4;
387 } else {
388 ZeroesLLT = LLT::fixed_vector(2, 32);
389 NumElements += 2;
390 }
391 auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
392 DotReg.push_back(
393 Builder
394 .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
395 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
396 .getReg(0));
397 }
398
399 // Merge the output
400 auto ConcatMI =
401 Builder.buildConcatVectors(LLT::fixed_vector(NumElements, 32), DotReg);
402
403 // Put it through a vector reduction
404 Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
405 ConcatMI->getOperand(0).getReg());
406 }
407
408 // Erase the dead instructions
409 MI.eraseFromParent();
410}
411
412// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
413// Ensure that the type coming from the extend instruction is the right size
414bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
415 std::pair<Register, bool> &MatchInfo) {
416 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
417 "Expected G_VECREDUCE_ADD Opcode");
418
419 // Check if the last instruction is an extend
420 MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
421 auto ExtOpc = ExtMI->getOpcode();
422
423 if (ExtOpc == TargetOpcode::G_ZEXT)
424 std::get<1>(MatchInfo) = 0;
425 else if (ExtOpc == TargetOpcode::G_SEXT)
426 std::get<1>(MatchInfo) = 1;
427 else
428 return false;
429
430 // Check if the source register is a valid type
431 Register ExtSrcReg = ExtMI->getOperand(1).getReg();
432 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
433 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
434 if ((DstTy.getScalarSizeInBits() == 16 &&
435 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
436 (DstTy.getScalarSizeInBits() == 32 &&
437 ExtSrcTy.getNumElements() % 4 == 0) ||
438 (DstTy.getScalarSizeInBits() == 64 &&
439 ExtSrcTy.getNumElements() % 4 == 0)) {
440 std::get<0>(MatchInfo) = ExtSrcReg;
441 return true;
442 }
443 return false;
444}
445
446void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
448 std::pair<Register, bool> &MatchInfo) {
449 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
450 "Expected G_VECREDUCE_ADD Opcode");
451
452 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
453 Register SrcReg = std::get<0>(MatchInfo);
454 Register DstReg = MI.getOperand(0).getReg();
455 LLT SrcTy = MRI.getType(SrcReg);
456 LLT DstTy = MRI.getType(DstReg);
457
458 // If SrcTy has more elements than expected, split them into multiple
459 // insructions and sum the results
460 LLT MainTy;
461 SmallVector<Register, 1> WorkingRegisters;
462 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
463 unsigned SrcNumElem = SrcTy.getNumElements();
464 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
465 (SrcScalSize == 16 && SrcNumElem > 8) ||
466 (SrcScalSize == 32 && SrcNumElem > 4)) {
467
468 LLT LeftoverTy;
469 SmallVector<Register, 4> LeftoverRegs;
470 if (SrcScalSize == 8)
471 MainTy = LLT::fixed_vector(16, 8);
472 else if (SrcScalSize == 16)
473 MainTy = LLT::fixed_vector(8, 16);
474 else if (SrcScalSize == 32)
475 MainTy = LLT::fixed_vector(4, 32);
476 else
477 llvm_unreachable("Source's Scalar Size not supported");
478
479 // Extract the parts and put each extracted sources through U/SADDLV and put
480 // the values inside a small vec
481 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
482 LeftoverRegs, B, MRI);
483 for (unsigned I = 0; I < LeftoverRegs.size(); I++) {
484 WorkingRegisters.push_back(LeftoverRegs[I]);
485 }
486 } else {
487 WorkingRegisters.push_back(SrcReg);
488 MainTy = SrcTy;
489 }
490
491 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
492 LLT MidScalarLLT = LLT::scalar(MidScalarSize);
493 Register zeroReg = B.buildConstant(LLT::scalar(64), 0).getReg(0);
494 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
495 // If the number of elements is too small to build an instruction, extend
496 // its size before applying addlv
497 LLT WorkingRegTy = MRI.getType(WorkingRegisters[I]);
498 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
499 (WorkingRegTy.getNumElements() == 4)) {
500 WorkingRegisters[I] =
501 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
502 : TargetOpcode::G_ZEXT,
503 {LLT::fixed_vector(4, 16)}, {WorkingRegisters[I]})
504 .getReg(0);
505 }
506
507 // Generate the {U/S}ADDLV instruction, whose output is always double of the
508 // Src's Scalar size
509 LLT addlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, 32)
510 : LLT::fixed_vector(2, 64);
511 Register addlvReg =
512 B.buildInstr(Opc, {addlvTy}, {WorkingRegisters[I]}).getReg(0);
513
514 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
515 // v2i64 register.
516 // i16, i32 results uses v4i32 registers
517 // i64 results uses v2i64 registers
518 // Therefore we have to extract/truncate the the value to the right type
519 if (MidScalarSize == 32 || MidScalarSize == 64) {
520 WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
521 {MidScalarLLT}, {addlvReg, zeroReg})
522 .getReg(0);
523 } else {
524 Register extractReg = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
525 {LLT::scalar(32)}, {addlvReg, zeroReg})
526 .getReg(0);
527 WorkingRegisters[I] =
528 B.buildTrunc({MidScalarLLT}, {extractReg}).getReg(0);
529 }
530 }
531
532 Register outReg;
533 if (WorkingRegisters.size() > 1) {
534 outReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
535 .getReg(0);
536 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
537 outReg = B.buildAdd(MidScalarLLT, outReg, WorkingRegisters[I]).getReg(0);
538 }
539 } else {
540 outReg = WorkingRegisters[0];
541 }
542
543 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
544 // Handle the scalar value if the DstTy's Scalar Size is more than double
545 // Src's ScalarType
546 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
547 : TargetOpcode::G_ZEXT,
548 {DstReg}, {outReg});
549 } else {
550 B.buildCopy(DstReg, outReg);
551 }
552
553 MI.eraseFromParent();
554}
555
556// Pushes ADD/SUB through extend instructions to decrease the number of extend
557// instruction at the end by allowing selection of {s|u}addl sooner
558
559// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
560bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
561 Register DstReg, Register SrcReg1, Register SrcReg2) {
562 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
563 MI.getOpcode() == TargetOpcode::G_SUB) &&
564 "Expected a G_ADD or G_SUB instruction\n");
565
566 // Deal with vector types only
567 LLT DstTy = MRI.getType(DstReg);
568 if (!DstTy.isVector())
569 return false;
570
571 // Return true if G_{S|Z}EXT instruction is more than 2* source
572 Register ExtDstReg = MI.getOperand(1).getReg();
573 LLT Ext1SrcTy = MRI.getType(SrcReg1);
574 LLT Ext2SrcTy = MRI.getType(SrcReg2);
575 unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits();
576 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
577 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
578 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
579 Ext1SrcTy == Ext2SrcTy)
580 return true;
581
582 return false;
583}
584
585void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
586 MachineIRBuilder &B, bool isSExt, Register DstReg,
587 Register SrcReg1, Register SrcReg2) {
588 LLT SrcTy = MRI.getType(SrcReg1);
589 LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2);
590 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
591 Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0);
592 Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0);
593 Register AddReg =
594 B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
595
596 // G_SUB has to sign-extend the result.
597 // G_ADD needs to sext from sext and can sext or zext from zext, so the
598 // original opcode is used.
599 if (MI.getOpcode() == TargetOpcode::G_ADD)
600 B.buildInstr(Opc, {DstReg}, {AddReg});
601 else
602 B.buildSExt(DstReg, AddReg);
603
604 MI.eraseFromParent();
605}
606
607bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
608 const CombinerHelper &Helper,
609 GISelChangeObserver &Observer) {
610 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
611 // result is only used in the no-overflow case. It is restricted to cases
612 // where we know that the high-bits of the operands are 0. If there's an
613 // overflow, then the 9th or 17th bit must be set, which can be checked
614 // using TBNZ.
615 //
616 // Change (for UADDOs on 8 and 16 bits):
617 //
618 // %z0 = G_ASSERT_ZEXT _
619 // %op0 = G_TRUNC %z0
620 // %z1 = G_ASSERT_ZEXT _
621 // %op1 = G_TRUNC %z1
622 // %val, %cond = G_UADDO %op0, %op1
623 // G_BRCOND %cond, %error.bb
624 //
625 // error.bb:
626 // (no successors and no uses of %val)
627 //
628 // To:
629 //
630 // %z0 = G_ASSERT_ZEXT _
631 // %z1 = G_ASSERT_ZEXT _
632 // %add = G_ADD %z0, %z1
633 // %val = G_TRUNC %add
634 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
635 // %cond = G_ICMP NE, %bit, 0
636 // G_BRCOND %cond, %error.bb
637
638 auto &MRI = *B.getMRI();
639
640 MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
641 MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
642 Register Op0Wide;
643 Register Op1Wide;
644 if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
645 !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
646 return false;
647 LLT WideTy0 = MRI.getType(Op0Wide);
648 LLT WideTy1 = MRI.getType(Op1Wide);
649 Register ResVal = MI.getOperand(0).getReg();
650 LLT OpTy = MRI.getType(ResVal);
651 MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
652 MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
653
654 unsigned OpTySize = OpTy.getScalarSizeInBits();
655 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
656 // inputs have been zero-extended.
657 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
658 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
659 OpTySize != Op0WideDef->getOperand(2).getImm() ||
660 OpTySize != Op1WideDef->getOperand(2).getImm())
661 return false;
662
663 // Only scalar UADDO with either 8 or 16 bit operands are handled.
664 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
665 OpTySize >= WideTy0.getScalarSizeInBits() ||
666 (OpTySize != 8 && OpTySize != 16))
667 return false;
668
669 // The overflow-status result must be used by a branch only.
670 Register ResStatus = MI.getOperand(1).getReg();
671 if (!MRI.hasOneNonDBGUse(ResStatus))
672 return false;
673 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
674 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
675 return false;
676
677 // Make sure the computed result is only used in the no-overflow blocks.
678 MachineBasicBlock *CurrentMBB = MI.getParent();
679 MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
680 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
681 return false;
682 if (any_of(MRI.use_nodbg_instructions(ResVal),
683 [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
684 return &MI != &I &&
685 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
686 }))
687 return false;
688
689 // Remove G_ADDO.
690 B.setInstrAndDebugLoc(*MI.getNextNode());
691 MI.eraseFromParent();
692
693 // Emit wide add.
694 Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
695 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
696
697 // Emit check of the 9th or 17th bit and update users (the branch). This will
698 // later be folded to TBNZ.
699 Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
700 B.buildAnd(
701 CondBit, AddDst,
702 B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
703 B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
704 B.buildConstant(LLT::scalar(32), 0));
705
706 // Update ZEXts users of the result value. Because all uses are in the
707 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
708 B.buildZExtOrTrunc(ResVal, AddDst);
709 for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
710 Register WideReg;
711 if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
712 auto OldR = U.getParent()->getOperand(0).getReg();
713 Observer.erasingInstr(*U.getParent());
714 U.getParent()->eraseFromParent();
715 Helper.replaceRegWith(MRI, OldR, AddDst);
716 }
717 }
718
719 return true;
720}
721
722class AArch64PreLegalizerCombinerImpl : public Combiner {
723protected:
724 const CombinerHelper Helper;
725 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
726 const AArch64Subtarget &STI;
727
728public:
729 AArch64PreLegalizerCombinerImpl(
730 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
731 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
732 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
733 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
734 const LegalizerInfo *LI);
735
736 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
737
738 bool tryCombineAll(MachineInstr &I) const override;
739
740 bool tryCombineAllImpl(MachineInstr &I) const;
741
742private:
743#define GET_GICOMBINER_CLASS_MEMBERS
744#include "AArch64GenPreLegalizeGICombiner.inc"
745#undef GET_GICOMBINER_CLASS_MEMBERS
746};
747
748#define GET_GICOMBINER_IMPL
749#include "AArch64GenPreLegalizeGICombiner.inc"
750#undef GET_GICOMBINER_IMPL
751
752AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
753 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
754 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
755 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
756 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
757 const LegalizerInfo *LI)
758 : Combiner(MF, CInfo, TPC, &KB, CSEInfo),
759 Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
760 RuleConfig(RuleConfig), STI(STI),
762#include "AArch64GenPreLegalizeGICombiner.inc"
764{
765}
766
767bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
768 if (tryCombineAllImpl(MI))
769 return true;
770
771 unsigned Opc = MI.getOpcode();
772 switch (Opc) {
773 case TargetOpcode::G_SHUFFLE_VECTOR:
774 return Helper.tryCombineShuffleVector(MI);
775 case TargetOpcode::G_UADDO:
776 return tryToSimplifyUADDO(MI, B, Helper, Observer);
777 case TargetOpcode::G_MEMCPY_INLINE:
778 return Helper.tryEmitMemcpyInline(MI);
779 case TargetOpcode::G_MEMCPY:
780 case TargetOpcode::G_MEMMOVE:
781 case TargetOpcode::G_MEMSET: {
782 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
783 // heuristics decide.
784 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
785 // Try to inline memcpy type calls if optimizations are enabled.
786 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
787 return true;
788 if (Opc == TargetOpcode::G_MEMSET)
789 return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, CInfo.EnableMinSize);
790 return false;
791 }
792 }
793
794 return false;
795}
796
797// Pass boilerplate
798// ================
799
800class AArch64PreLegalizerCombiner : public MachineFunctionPass {
801public:
802 static char ID;
803
804 AArch64PreLegalizerCombiner();
805
806 StringRef getPassName() const override {
807 return "AArch64PreLegalizerCombiner";
808 }
809
810 bool runOnMachineFunction(MachineFunction &MF) override;
811
812 void getAnalysisUsage(AnalysisUsage &AU) const override;
813
814private:
815 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
816};
817} // end anonymous namespace
818
819void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
821 AU.setPreservesCFG();
830}
831
832AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
835
836 if (!RuleConfig.parseCommandLineOption())
837 report_fatal_error("Invalid rule identifier");
838}
839
840bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
841 if (MF.getProperties().hasProperty(
842 MachineFunctionProperties::Property::FailedISel))
843 return false;
844 auto &TPC = getAnalysis<TargetPassConfig>();
845
846 // Enable CSE.
848 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
849 auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
850
852 const auto *LI = ST.getLegalizerInfo();
853
854 const Function &F = MF.getFunction();
855 bool EnableOpt =
856 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
857 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
859 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
860 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
861 /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
862 F.hasMinSize());
863 // Disable fixed-point iteration to reduce compile-time
864 CInfo.MaxIterations = 1;
865 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
866 // This is the first Combiner, so the input IR might contain dead
867 // instructions.
868 CInfo.EnableFullDCE = true;
869 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo,
870 RuleConfig, ST, MDT, LI);
871 return Impl.combineMachineInstrs();
872}
873
874char AArch64PreLegalizerCombiner::ID = 0;
875INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
876 "Combine AArch64 machine instrs before legalization",
877 false, false)
881INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
882 "Combine AArch64 machine instrs before legalization", false,
883 false)
884
885namespace llvm {
887 return new AArch64PreLegalizerCombiner();
888}
889} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#define DEBUG_TYPE
Combine AArch64 machine instrs before legalization
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The actual analysis pass wrapper.
Definition: CSEInfo.h:225
Simple wrapper that does the following.
Definition: CSEInfo.h:207
The CSE Analysis object.
Definition: CSEInfo.h:70
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src)
Build and insert Res = G_VECREDUCE_ADD Src.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAArch64PreLegalizerCombiner()
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:471
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void initializeAArch64PreLegalizerCombinerPass(PassRegistry &)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:485
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1153
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:418
auto instrs(const MachineBasicBlock &BB)