LLVM 22.0.0git
AArch64PreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
31
32#define GET_GICOMBINER_DEPS
33#include "AArch64GenPreLegalizeGICombiner.inc"
34#undef GET_GICOMBINER_DEPS
35
36#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
37
38using namespace llvm;
39using namespace MIPatternMatch;
40
41namespace {
42
43#define GET_GICOMBINER_TYPES
44#include "AArch64GenPreLegalizeGICombiner.inc"
45#undef GET_GICOMBINER_TYPES
46
47/// Return true if a G_FCONSTANT instruction is known to be better-represented
48/// as a G_CONSTANT.
49bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
50 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
51 Register DstReg = MI.getOperand(0).getReg();
52 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
53 if (DstSize != 32 && DstSize != 64)
54 return false;
55
56 // When we're storing a value, it doesn't matter what register bank it's on.
57 // Since not all floating point constants can be materialized using a fmov,
58 // it makes more sense to just use a GPR.
59 return all_of(MRI.use_nodbg_instructions(DstReg),
60 [](const MachineInstr &Use) { return Use.mayStore(); });
61}
62
63/// Change a G_FCONSTANT into a G_CONSTANT.
64void applyFConstantToConstant(MachineInstr &MI) {
65 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
67 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
68 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
69 MI.eraseFromParent();
70}
71
72/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
73/// are sign bits. In this case, we can transform the G_ICMP to directly compare
74/// the wide value with a zero.
75bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
76 GISelValueTracking *VT, Register &MatchInfo) {
77 assert(MI.getOpcode() == TargetOpcode::G_ICMP && VT);
78
79 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
80 if (!ICmpInst::isEquality(Pred))
81 return false;
82
83 Register LHS = MI.getOperand(2).getReg();
84 LLT LHSTy = MRI.getType(LHS);
85 if (!LHSTy.isScalar())
86 return false;
87
88 Register RHS = MI.getOperand(3).getReg();
89 Register WideReg;
90
91 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
92 !mi_match(RHS, MRI, m_SpecificICst(0)))
93 return false;
94
95 LLT WideTy = MRI.getType(WideReg);
96 if (VT->computeNumSignBits(WideReg) <=
97 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
98 return false;
99
100 MatchInfo = WideReg;
101 return true;
102}
103
104void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
105 MachineIRBuilder &Builder,
106 GISelChangeObserver &Observer, Register &WideReg) {
107 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
108
109 LLT WideTy = MRI.getType(WideReg);
110 // We're going to directly use the wide register as the LHS, and then use an
111 // equivalent size zero for RHS.
112 Builder.setInstrAndDebugLoc(MI);
113 auto WideZero = Builder.buildConstant(WideTy, 0);
114 Observer.changingInstr(MI);
115 MI.getOperand(2).setReg(WideReg);
116 MI.getOperand(3).setReg(WideZero.getReg(0));
117 Observer.changedInstr(MI);
118}
119
120/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
121///
122/// e.g.
123///
124/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
125bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
126 std::pair<uint64_t, uint64_t> &MatchInfo) {
127 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
128 MachineFunction &MF = *MI.getMF();
129 auto &GlobalOp = MI.getOperand(1);
130 auto *GV = GlobalOp.getGlobal();
131 if (GV->isThreadLocal())
132 return false;
133
134 // Don't allow anything that could represent offsets etc.
136 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
137 return false;
138
139 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
140 //
141 // %g = G_GLOBAL_VALUE @x
142 // %ptr1 = G_PTR_ADD %g, cst1
143 // %ptr2 = G_PTR_ADD %g, cst2
144 // ...
145 // %ptrN = G_PTR_ADD %g, cstN
146 //
147 // Identify the *smallest* constant. We want to be able to form this:
148 //
149 // %offset_g = G_GLOBAL_VALUE @x + min_cst
150 // %g = G_PTR_ADD %offset_g, -min_cst
151 // %ptr1 = G_PTR_ADD %g, cst1
152 // ...
153 Register Dst = MI.getOperand(0).getReg();
154 uint64_t MinOffset = -1ull;
155 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
156 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
157 return false;
159 UseInstr.getOperand(2).getReg(), MRI);
160 if (!Cst)
161 return false;
162 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
163 }
164
165 // Require that the new offset is larger than the existing one to avoid
166 // infinite loops.
167 uint64_t CurrOffset = GlobalOp.getOffset();
168 uint64_t NewOffset = MinOffset + CurrOffset;
169 if (NewOffset <= CurrOffset)
170 return false;
171
172 // Check whether folding this offset is legal. It must not go out of bounds of
173 // the referenced object to avoid violating the code model, and must be
174 // smaller than 2^20 because this is the largest offset expressible in all
175 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
176 // stores an immediate signed 21 bit offset.)
177 //
178 // This check also prevents us from folding negative offsets, which will end
179 // up being treated in the same way as large positive ones. They could also
180 // cause code model violations, and aren't really common enough to matter.
181 if (NewOffset >= (1 << 20))
182 return false;
183
184 Type *T = GV->getValueType();
185 if (!T->isSized() ||
186 NewOffset > GV->getDataLayout().getTypeAllocSize(T))
187 return false;
188 MatchInfo = std::make_pair(NewOffset, MinOffset);
189 return true;
190}
191
192void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
194 std::pair<uint64_t, uint64_t> &MatchInfo) {
195 // Change:
196 //
197 // %g = G_GLOBAL_VALUE @x
198 // %ptr1 = G_PTR_ADD %g, cst1
199 // %ptr2 = G_PTR_ADD %g, cst2
200 // ...
201 // %ptrN = G_PTR_ADD %g, cstN
202 //
203 // To:
204 //
205 // %offset_g = G_GLOBAL_VALUE @x + min_cst
206 // %g = G_PTR_ADD %offset_g, -min_cst
207 // %ptr1 = G_PTR_ADD %g, cst1
208 // ...
209 // %ptrN = G_PTR_ADD %g, cstN
210 //
211 // Then, the original G_PTR_ADDs should be folded later on so that they look
212 // like this:
213 //
214 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
215 uint64_t Offset, MinOffset;
216 std::tie(Offset, MinOffset) = MatchInfo;
217 B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
218 Observer.changingInstr(MI);
219 auto &GlobalOp = MI.getOperand(1);
220 auto *GV = GlobalOp.getGlobal();
221 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
222 Register Dst = MI.getOperand(0).getReg();
223 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
224 MI.getOperand(0).setReg(NewGVDst);
225 Observer.changedInstr(MI);
226 B.buildPtrAdd(
227 Dst, NewGVDst,
228 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
229}
230
231// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add([us]dot(x, y))
232// Or vecreduce_add(ext(mul(ext(x), ext(y)))) -> vecreduce_add([us]dot(x, y))
233// Or vecreduce_add(ext(x)) -> vecreduce_add([us]dot(x, 1))
234// Similar to performVecReduceAddCombine in SelectionDAG
235bool matchExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
236 const AArch64Subtarget &STI,
237 std::tuple<Register, Register, bool> &MatchInfo) {
238 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
239 "Expected a G_VECREDUCE_ADD instruction");
240 assert(STI.hasDotProd() && "Target should have Dot Product feature");
241
242 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
243 Register DstReg = MI.getOperand(0).getReg();
244 Register MidReg = I1->getOperand(0).getReg();
245 LLT DstTy = MRI.getType(DstReg);
246 LLT MidTy = MRI.getType(MidReg);
247 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
248 return false;
249
250 // Detect mul(ext, ext) with symmetric ext's. If I1Opc is G_ZEXT or G_SEXT
251 // then the ext's must match the same opcode. It is set to the ext opcode on
252 // output.
253 auto tryMatchingMulOfExt = [&MRI](MachineInstr *MI, Register &Out1,
254 Register &Out2, unsigned &I1Opc) {
255 // If result of this has more than 1 use, then there is no point in creating
256 // a dot instruction
257 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
258 return false;
259
260 MachineInstr *ExtMI1 =
261 getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
262 MachineInstr *ExtMI2 =
263 getDefIgnoringCopies(MI->getOperand(2).getReg(), MRI);
264 LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
265 LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
266
267 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
268 return false;
269 if ((I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) &&
270 I1Opc != ExtMI1->getOpcode())
271 return false;
272 Out1 = ExtMI1->getOperand(1).getReg();
273 Out2 = ExtMI2->getOperand(1).getReg();
274 I1Opc = ExtMI1->getOpcode();
275 return true;
276 };
277
278 LLT SrcTy;
279 unsigned I1Opc = I1->getOpcode();
280 if (I1Opc == TargetOpcode::G_MUL) {
281 Register Out1, Out2;
282 if (!tryMatchingMulOfExt(I1, Out1, Out2, I1Opc))
283 return false;
284 SrcTy = MRI.getType(Out1);
285 std::get<0>(MatchInfo) = Out1;
286 std::get<1>(MatchInfo) = Out2;
287 } else if (I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) {
288 Register I1Op = I1->getOperand(1).getReg();
290 Register Out1, Out2;
291 if (M->getOpcode() == TargetOpcode::G_MUL &&
292 tryMatchingMulOfExt(M, Out1, Out2, I1Opc)) {
293 SrcTy = MRI.getType(Out1);
294 std::get<0>(MatchInfo) = Out1;
295 std::get<1>(MatchInfo) = Out2;
296 } else {
297 SrcTy = MRI.getType(I1Op);
298 std::get<0>(MatchInfo) = I1Op;
299 std::get<1>(MatchInfo) = 0;
300 }
301 } else {
302 return false;
303 }
304
305 if (I1Opc == TargetOpcode::G_ZEXT)
306 std::get<2>(MatchInfo) = 0;
307 else if (I1Opc == TargetOpcode::G_SEXT)
308 std::get<2>(MatchInfo) = 1;
309 else
310 return false;
311
312 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
313 return false;
314
315 return true;
316}
317
318void applyExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
319 MachineIRBuilder &Builder,
320 GISelChangeObserver &Observer,
321 const AArch64Subtarget &STI,
322 std::tuple<Register, Register, bool> &MatchInfo) {
323 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
324 "Expected a G_VECREDUCE_ADD instruction");
325 assert(STI.hasDotProd() && "Target should have Dot Product feature");
326
327 // Initialise the variables
328 unsigned DotOpcode =
329 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
330 Register Ext1SrcReg = std::get<0>(MatchInfo);
331
332 // If there is one source register, create a vector of 0s as the second
333 // source register
334 Register Ext2SrcReg;
335 if (std::get<1>(MatchInfo) == 0)
336 Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
337 ->getOperand(0)
338 .getReg();
339 else
340 Ext2SrcReg = std::get<1>(MatchInfo);
341
342 // Find out how many DOT instructions are needed
343 LLT SrcTy = MRI.getType(Ext1SrcReg);
344 LLT MidTy;
345 unsigned NumOfDotMI;
346 if (SrcTy.getNumElements() % 16 == 0) {
347 NumOfDotMI = SrcTy.getNumElements() / 16;
348 MidTy = LLT::fixed_vector(4, 32);
349 } else if (SrcTy.getNumElements() % 8 == 0) {
350 NumOfDotMI = SrcTy.getNumElements() / 8;
351 MidTy = LLT::fixed_vector(2, 32);
352 } else {
353 llvm_unreachable("Source type number of elements is not multiple of 8");
354 }
355
356 // Handle case where one DOT instruction is needed
357 if (NumOfDotMI == 1) {
358 auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
359 auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
360 {Zeroes, Ext1SrcReg, Ext2SrcReg});
361 Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
362 } else {
363 // If not pad the last v8 element with 0s to a v16
364 SmallVector<Register, 4> Ext1UnmergeReg;
365 SmallVector<Register, 4> Ext2UnmergeReg;
366 if (SrcTy.getNumElements() % 16 != 0) {
367 SmallVector<Register> Leftover1;
368 SmallVector<Register> Leftover2;
369
370 // Split the elements into v16i8 and v8i8
371 LLT MainTy = LLT::fixed_vector(16, 8);
372 LLT LeftoverTy1, LeftoverTy2;
373 if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
374 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
375 MRI)) ||
376 (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
377 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
378 MRI))) {
379 llvm_unreachable("Unable to split this vector properly");
380 }
381
382 // Pad the leftover v8i8 vector with register of 0s of type v8i8
383 Register v8Zeroes = Builder.buildConstant(LLT::fixed_vector(8, 8), 0)
384 ->getOperand(0)
385 .getReg();
386
387 Ext1UnmergeReg.push_back(
388 Builder
389 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
390 {Leftover1[0], v8Zeroes})
391 .getReg(0));
392 Ext2UnmergeReg.push_back(
393 Builder
394 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
395 {Leftover2[0], v8Zeroes})
396 .getReg(0));
397
398 } else {
399 // Unmerge the source vectors to v16i8
400 unsigned SrcNumElts = SrcTy.getNumElements();
401 extractParts(Ext1SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
402 Ext1UnmergeReg, Builder, MRI);
403 extractParts(Ext2SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
404 Ext2UnmergeReg, Builder, MRI);
405 }
406
407 // Build the UDOT instructions
409 unsigned NumElements = 0;
410 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
411 LLT ZeroesLLT;
412 // Check if it is 16 or 8 elements. Set Zeroes to the according size
413 if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
414 ZeroesLLT = LLT::fixed_vector(4, 32);
415 NumElements += 4;
416 } else {
417 ZeroesLLT = LLT::fixed_vector(2, 32);
418 NumElements += 2;
419 }
420 auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
421 DotReg.push_back(
422 Builder
423 .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
424 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
425 .getReg(0));
426 }
427
428 // Merge the output
429 auto ConcatMI =
430 Builder.buildConcatVectors(LLT::fixed_vector(NumElements, 32), DotReg);
431
432 // Put it through a vector reduction
433 Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
434 ConcatMI->getOperand(0).getReg());
435 }
436
437 // Erase the dead instructions
438 MI.eraseFromParent();
439}
440
441// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
442// Ensure that the type coming from the extend instruction is the right size
443bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
444 std::pair<Register, bool> &MatchInfo) {
445 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
446 "Expected G_VECREDUCE_ADD Opcode");
447
448 // Check if the last instruction is an extend
449 MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
450 auto ExtOpc = ExtMI->getOpcode();
451
452 if (ExtOpc == TargetOpcode::G_ZEXT)
453 std::get<1>(MatchInfo) = 0;
454 else if (ExtOpc == TargetOpcode::G_SEXT)
455 std::get<1>(MatchInfo) = 1;
456 else
457 return false;
458
459 // Check if the source register is a valid type
460 Register ExtSrcReg = ExtMI->getOperand(1).getReg();
461 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
462 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
463 if ((DstTy.getScalarSizeInBits() == 16 &&
464 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
465 (DstTy.getScalarSizeInBits() == 32 &&
466 ExtSrcTy.getNumElements() % 4 == 0) ||
467 (DstTy.getScalarSizeInBits() == 64 &&
468 ExtSrcTy.getNumElements() % 4 == 0)) {
469 std::get<0>(MatchInfo) = ExtSrcReg;
470 return true;
471 }
472 return false;
473}
474
475void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
477 std::pair<Register, bool> &MatchInfo) {
478 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
479 "Expected G_VECREDUCE_ADD Opcode");
480
481 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
482 Register SrcReg = std::get<0>(MatchInfo);
483 Register DstReg = MI.getOperand(0).getReg();
484 LLT SrcTy = MRI.getType(SrcReg);
485 LLT DstTy = MRI.getType(DstReg);
486
487 // If SrcTy has more elements than expected, split them into multiple
488 // insructions and sum the results
489 LLT MainTy;
490 SmallVector<Register, 1> WorkingRegisters;
491 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
492 unsigned SrcNumElem = SrcTy.getNumElements();
493 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
494 (SrcScalSize == 16 && SrcNumElem > 8) ||
495 (SrcScalSize == 32 && SrcNumElem > 4)) {
496
497 LLT LeftoverTy;
498 SmallVector<Register, 4> LeftoverRegs;
499 if (SrcScalSize == 8)
500 MainTy = LLT::fixed_vector(16, 8);
501 else if (SrcScalSize == 16)
502 MainTy = LLT::fixed_vector(8, 16);
503 else if (SrcScalSize == 32)
504 MainTy = LLT::fixed_vector(4, 32);
505 else
506 llvm_unreachable("Source's Scalar Size not supported");
507
508 // Extract the parts and put each extracted sources through U/SADDLV and put
509 // the values inside a small vec
510 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
511 LeftoverRegs, B, MRI);
512 llvm::append_range(WorkingRegisters, LeftoverRegs);
513 } else {
514 WorkingRegisters.push_back(SrcReg);
515 MainTy = SrcTy;
516 }
517
518 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
519 LLT MidScalarLLT = LLT::scalar(MidScalarSize);
520 Register zeroReg = B.buildConstant(LLT::scalar(64), 0).getReg(0);
521 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
522 // If the number of elements is too small to build an instruction, extend
523 // its size before applying addlv
524 LLT WorkingRegTy = MRI.getType(WorkingRegisters[I]);
525 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
526 (WorkingRegTy.getNumElements() == 4)) {
527 WorkingRegisters[I] =
528 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
529 : TargetOpcode::G_ZEXT,
530 {LLT::fixed_vector(4, 16)}, {WorkingRegisters[I]})
531 .getReg(0);
532 }
533
534 // Generate the {U/S}ADDLV instruction, whose output is always double of the
535 // Src's Scalar size
536 LLT addlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, 32)
537 : LLT::fixed_vector(2, 64);
538 Register addlvReg =
539 B.buildInstr(Opc, {addlvTy}, {WorkingRegisters[I]}).getReg(0);
540
541 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
542 // v2i64 register.
543 // i16, i32 results uses v4i32 registers
544 // i64 results uses v2i64 registers
545 // Therefore we have to extract/truncate the the value to the right type
546 if (MidScalarSize == 32 || MidScalarSize == 64) {
547 WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
548 {MidScalarLLT}, {addlvReg, zeroReg})
549 .getReg(0);
550 } else {
551 Register extractReg = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
552 {LLT::scalar(32)}, {addlvReg, zeroReg})
553 .getReg(0);
554 WorkingRegisters[I] =
555 B.buildTrunc({MidScalarLLT}, {extractReg}).getReg(0);
556 }
557 }
558
559 Register outReg;
560 if (WorkingRegisters.size() > 1) {
561 outReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
562 .getReg(0);
563 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
564 outReg = B.buildAdd(MidScalarLLT, outReg, WorkingRegisters[I]).getReg(0);
565 }
566 } else {
567 outReg = WorkingRegisters[0];
568 }
569
570 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
571 // Handle the scalar value if the DstTy's Scalar Size is more than double
572 // Src's ScalarType
573 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
574 : TargetOpcode::G_ZEXT,
575 {DstReg}, {outReg});
576 } else {
577 B.buildCopy(DstReg, outReg);
578 }
579
580 MI.eraseFromParent();
581}
582
583// Pushes ADD/SUB/MUL through extend instructions to decrease the number of
584// extend instruction at the end by allowing selection of {s|u}addl sooner
585// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
586bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
587 Register DstReg, Register SrcReg1, Register SrcReg2) {
588 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
589 MI.getOpcode() == TargetOpcode::G_SUB ||
590 MI.getOpcode() == TargetOpcode::G_MUL) &&
591 "Expected a G_ADD, G_SUB or G_MUL instruction\n");
592
593 // Deal with vector types only
594 LLT DstTy = MRI.getType(DstReg);
595 if (!DstTy.isVector())
596 return false;
597
598 // Return true if G_{S|Z}EXT instruction is more than 2* source
599 Register ExtDstReg = MI.getOperand(1).getReg();
600 LLT Ext1SrcTy = MRI.getType(SrcReg1);
601 LLT Ext2SrcTy = MRI.getType(SrcReg2);
602 unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits();
603 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
604 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
605 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
606 Ext1SrcTy == Ext2SrcTy)
607 return true;
608
609 return false;
610}
611
612void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
613 MachineIRBuilder &B, bool isSExt, Register DstReg,
614 Register SrcReg1, Register SrcReg2) {
615 LLT SrcTy = MRI.getType(SrcReg1);
616 LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2);
617 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
618 Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0);
619 Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0);
620 Register AddReg =
621 B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
622
623 // G_SUB has to sign-extend the result.
624 // G_ADD needs to sext from sext and can sext or zext from zext, and G_MUL
625 // needs to use the original opcode so the original opcode is used for both.
626 if (MI.getOpcode() == TargetOpcode::G_ADD ||
627 MI.getOpcode() == TargetOpcode::G_MUL)
628 B.buildInstr(Opc, {DstReg}, {AddReg});
629 else
630 B.buildSExt(DstReg, AddReg);
631
632 MI.eraseFromParent();
633}
634
635bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
636 const CombinerHelper &Helper,
637 GISelChangeObserver &Observer) {
638 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
639 // result is only used in the no-overflow case. It is restricted to cases
640 // where we know that the high-bits of the operands are 0. If there's an
641 // overflow, then the 9th or 17th bit must be set, which can be checked
642 // using TBNZ.
643 //
644 // Change (for UADDOs on 8 and 16 bits):
645 //
646 // %z0 = G_ASSERT_ZEXT _
647 // %op0 = G_TRUNC %z0
648 // %z1 = G_ASSERT_ZEXT _
649 // %op1 = G_TRUNC %z1
650 // %val, %cond = G_UADDO %op0, %op1
651 // G_BRCOND %cond, %error.bb
652 //
653 // error.bb:
654 // (no successors and no uses of %val)
655 //
656 // To:
657 //
658 // %z0 = G_ASSERT_ZEXT _
659 // %z1 = G_ASSERT_ZEXT _
660 // %add = G_ADD %z0, %z1
661 // %val = G_TRUNC %add
662 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
663 // %cond = G_ICMP NE, %bit, 0
664 // G_BRCOND %cond, %error.bb
665
666 auto &MRI = *B.getMRI();
667
668 MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
669 MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
670 Register Op0Wide;
671 Register Op1Wide;
672 if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
673 !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
674 return false;
675 LLT WideTy0 = MRI.getType(Op0Wide);
676 LLT WideTy1 = MRI.getType(Op1Wide);
677 Register ResVal = MI.getOperand(0).getReg();
678 LLT OpTy = MRI.getType(ResVal);
679 MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
680 MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
681
682 unsigned OpTySize = OpTy.getScalarSizeInBits();
683 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
684 // inputs have been zero-extended.
685 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
686 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
687 OpTySize != Op0WideDef->getOperand(2).getImm() ||
688 OpTySize != Op1WideDef->getOperand(2).getImm())
689 return false;
690
691 // Only scalar UADDO with either 8 or 16 bit operands are handled.
692 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
693 OpTySize >= WideTy0.getScalarSizeInBits() ||
694 (OpTySize != 8 && OpTySize != 16))
695 return false;
696
697 // The overflow-status result must be used by a branch only.
698 Register ResStatus = MI.getOperand(1).getReg();
699 if (!MRI.hasOneNonDBGUse(ResStatus))
700 return false;
701 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
702 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
703 return false;
704
705 // Make sure the computed result is only used in the no-overflow blocks.
706 MachineBasicBlock *CurrentMBB = MI.getParent();
707 MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
708 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
709 return false;
710 if (any_of(MRI.use_nodbg_instructions(ResVal),
711 [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
712 return &MI != &I &&
713 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
714 }))
715 return false;
716
717 // Remove G_ADDO.
718 B.setInstrAndDebugLoc(*MI.getNextNode());
719 MI.eraseFromParent();
720
721 // Emit wide add.
722 Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
723 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
724
725 // Emit check of the 9th or 17th bit and update users (the branch). This will
726 // later be folded to TBNZ.
727 Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
728 B.buildAnd(
729 CondBit, AddDst,
730 B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
731 B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
732 B.buildConstant(LLT::scalar(32), 0));
733
734 // Update ZEXts users of the result value. Because all uses are in the
735 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
736 B.buildZExtOrTrunc(ResVal, AddDst);
737 for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
738 Register WideReg;
739 if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
740 auto OldR = U.getParent()->getOperand(0).getReg();
741 Observer.erasingInstr(*U.getParent());
742 U.getParent()->eraseFromParent();
743 Helper.replaceRegWith(MRI, OldR, AddDst);
744 }
745 }
746
747 return true;
748}
749
750class AArch64PreLegalizerCombinerImpl : public Combiner {
751protected:
752 const CombinerHelper Helper;
753 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
754 const AArch64Subtarget &STI;
755
756public:
757 AArch64PreLegalizerCombinerImpl(
758 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
759 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
760 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
761 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
762 const LegalizerInfo *LI);
763
764 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
765
766 bool tryCombineAll(MachineInstr &I) const override;
767
768 bool tryCombineAllImpl(MachineInstr &I) const;
769
770private:
771#define GET_GICOMBINER_CLASS_MEMBERS
772#include "AArch64GenPreLegalizeGICombiner.inc"
773#undef GET_GICOMBINER_CLASS_MEMBERS
774};
775
776#define GET_GICOMBINER_IMPL
777#include "AArch64GenPreLegalizeGICombiner.inc"
778#undef GET_GICOMBINER_IMPL
779
780AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
781 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
782 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
783 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
784 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
785 const LegalizerInfo *LI)
786 : Combiner(MF, CInfo, TPC, &VT, CSEInfo),
787 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
788 RuleConfig(RuleConfig), STI(STI),
790#include "AArch64GenPreLegalizeGICombiner.inc"
792{
793}
794
795bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
796 if (tryCombineAllImpl(MI))
797 return true;
798
799 unsigned Opc = MI.getOpcode();
800 switch (Opc) {
801 case TargetOpcode::G_SHUFFLE_VECTOR:
802 return Helper.tryCombineShuffleVector(MI);
803 case TargetOpcode::G_UADDO:
804 return tryToSimplifyUADDO(MI, B, Helper, Observer);
805 case TargetOpcode::G_MEMCPY_INLINE:
806 return Helper.tryEmitMemcpyInline(MI);
807 case TargetOpcode::G_MEMCPY:
808 case TargetOpcode::G_MEMMOVE:
809 case TargetOpcode::G_MEMSET: {
810 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
811 // heuristics decide.
812 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
813 // Try to inline memcpy type calls if optimizations are enabled.
814 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
815 return true;
816 if (Opc == TargetOpcode::G_MEMSET)
817 return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, CInfo.EnableMinSize);
818 return false;
819 }
820 }
821
822 return false;
823}
824
825// Pass boilerplate
826// ================
827
828class AArch64PreLegalizerCombiner : public MachineFunctionPass {
829public:
830 static char ID;
831
832 AArch64PreLegalizerCombiner();
833
834 StringRef getPassName() const override {
835 return "AArch64PreLegalizerCombiner";
836 }
837
838 bool runOnMachineFunction(MachineFunction &MF) override;
839
840 void getAnalysisUsage(AnalysisUsage &AU) const override;
841
842private:
843 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
844};
845} // end anonymous namespace
846
847void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
849 AU.setPreservesCFG();
858}
859
860AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
862 if (!RuleConfig.parseCommandLineOption())
863 report_fatal_error("Invalid rule identifier");
864}
865
866bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
867 if (MF.getProperties().hasFailedISel())
868 return false;
869 auto &TPC = getAnalysis<TargetPassConfig>();
870
871 // Enable CSE.
873 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
874 auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
875
877 const auto *LI = ST.getLegalizerInfo();
878
879 const Function &F = MF.getFunction();
880 bool EnableOpt =
881 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
883 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
885 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
886 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
887 /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),
888 F.hasMinSize());
889 // Disable fixed-point iteration to reduce compile-time
890 CInfo.MaxIterations = 1;
891 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
892 // This is the first Combiner, so the input IR might contain dead
893 // instructions.
894 CInfo.EnableFullDCE = true;
895 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *VT, CSEInfo,
896 RuleConfig, ST, MDT, LI);
897 return Impl.combineMachineInstrs();
898}
899
900char AArch64PreLegalizerCombiner::ID = 0;
901INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
902 "Combine AArch64 machine instrs before legalization",
903 false, false)
907INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
908 "Combine AArch64 machine instrs before legalization", false,
909 false)
910
911namespace llvm {
913 return new AArch64PreLegalizerCombiner();
914}
915} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#define DEBUG_TYPE
Combine AArch64 machine instrs before legalization
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
The actual analysis pass wrapper.
Definition: CSEInfo.h:229
Simple wrapper that does the following.
Definition: CSEInfo.h:211
The CSE Analysis object.
Definition: CSEInfo.h:71
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265
constexpr bool isScalar() const
Definition: LowLevelType.h:147
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:160
constexpr bool isVector() const
Definition: LowLevelType.h:149
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:219
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:101
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src)
Build and insert Res = G_VECREDUCE_ADD Src.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAArch64PreLegalizerCombiner()
@ Offset
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:492
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:506
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1185
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
auto instrs(const MachineBasicBlock &BB)