LLVM 17.0.0git
AMDGPUPostLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
28
29#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
30
31using namespace llvm;
32using namespace MIPatternMatch;
33
35protected:
42
43public:
46 : B(B), MF(B.getMF()), MRI(*B.getMRI()),
47 Subtarget(MF.getSubtarget<GCNSubtarget>()),
48 TII(*Subtarget.getInstrInfo()), Helper(Helper){};
49
56 };
57
58 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
61 const FMinFMaxLegacyInfo &Info);
62
65
67 std::function<void(MachineIRBuilder &)> &MatchInfo);
68
69 // FIXME: Should be able to have 2 separate matchdatas rather than custom
70 // struct boilerplate.
73 unsigned ShiftOffset;
74 };
75
78 const CvtF32UByteMatchInfo &MatchInfo);
79
81
82 // Combine unsigned buffer load and signed extension instructions to generate
83 // signed buffer laod instructions.
86};
87
90 // FIXME: Type predicate on pattern
91 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
92 return false;
93
94 Register Cond = MI.getOperand(1).getReg();
95 if (!MRI.hasOneNonDBGUse(Cond) ||
97 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
98 return false;
99
100 Info.True = MI.getOperand(2).getReg();
101 Info.False = MI.getOperand(3).getReg();
102
103 // TODO: Handle case where the the selected value is an fneg and the compared
104 // constant is the negation of the selected value.
105 if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
106 !(Info.LHS == Info.False && Info.RHS == Info.True))
107 return false;
108
109 switch (Info.Pred) {
118 return false;
119 default:
120 return true;
121 }
122}
123
125 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
127 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
128 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
129 };
130
131 switch (Info.Pred) {
134 if (Info.LHS == Info.True)
135 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
136 else
137 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
138 break;
140 case CmpInst::FCMP_OLT: {
141 // We need to permute the operands to get the correct NaN behavior. The
142 // selected operand is the second one based on the failing compare with NaN,
143 // so permute it based on the compare type the hardware uses.
144 if (Info.LHS == Info.True)
145 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
146 else
147 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
148 break;
149 }
151 case CmpInst::FCMP_UGT: {
152 if (Info.LHS == Info.True)
153 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
154 else
155 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
156 break;
157 }
159 case CmpInst::FCMP_OGE: {
160 if (Info.LHS == Info.True)
161 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
162 else
163 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
164 break;
165 }
166 default:
167 llvm_unreachable("predicate should not have matched");
168 }
169
170 MI.eraseFromParent();
171}
172
174 Register DstReg = MI.getOperand(0).getReg();
175
176 // TODO: We could try to match extracting the higher bytes, which would be
177 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
178 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
179 // about in practice.
180 LLT Ty = MRI.getType(DstReg);
181 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
182 Register SrcReg = MI.getOperand(1).getReg();
183 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
184 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
185 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
186 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
187 }
188
189 return false;
190}
191
194
195 const LLT S32 = LLT::scalar(32);
196
197 Register DstReg = MI.getOperand(0).getReg();
198 Register SrcReg = MI.getOperand(1).getReg();
199 LLT Ty = MRI.getType(DstReg);
200 LLT SrcTy = MRI.getType(SrcReg);
201 if (SrcTy != S32)
202 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
203
204 if (Ty == S32) {
205 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
206 {SrcReg}, MI.getFlags());
207 } else {
208 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
209 {SrcReg}, MI.getFlags());
210 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
211 }
212
213 MI.eraseFromParent();
214}
215
217 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
218
219 auto getRcpSrc = [=](const MachineInstr &MI) {
220 MachineInstr *ResMI = nullptr;
221 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
222 MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
223 ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
224
225 return ResMI;
226 };
227
228 auto getSqrtSrc = [=](const MachineInstr &MI) {
229 MachineInstr *SqrtSrcMI = nullptr;
230 auto Match =
231 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
232 (void)Match;
233 return SqrtSrcMI;
234 };
235
236 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
237 // rcp(sqrt(x))
238 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
239 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
240 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
241 .addUse(SqrtSrcMI->getOperand(0).getReg())
242 .setMIFlags(MI.getFlags());
243 };
244 return true;
245 }
246
247 // sqrt(rcp(x))
248 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
249 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
250 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
251 .addUse(RcpSrcMI->getOperand(0).getReg())
252 .setMIFlags(MI.getFlags());
253 };
254 return true;
255 }
256
257 return false;
258}
259
261 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
262 Register SrcReg = MI.getOperand(1).getReg();
263
264 // Look through G_ZEXT.
265 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
266
267 Register Src0;
268 int64_t ShiftAmt;
269 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
270 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
271 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
272
273 unsigned ShiftOffset = 8 * Offset;
274 if (IsShr)
275 ShiftOffset += ShiftAmt;
276 else
277 ShiftOffset -= ShiftAmt;
278
279 MatchInfo.CvtVal = Src0;
280 MatchInfo.ShiftOffset = ShiftOffset;
281 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
282 }
283
284 // TODO: Simplify demanded bits.
285 return false;
286}
287
289 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
291 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
292
293 const LLT S32 = LLT::scalar(32);
294 Register CvtSrc = MatchInfo.CvtVal;
295 LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
296 if (SrcTy != S32) {
297 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
298 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
299 }
300
301 assert(MI.getOpcode() != NewOpc);
302 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
303 MI.eraseFromParent();
304}
305
307 MachineInstr &MI, Register &Reg) {
308 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
310 Reg = MI.getOperand(1).getReg();
311 return TLI->isCanonicalized(Reg, MF);
312}
313
314// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
315// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
316// with sign extension instrucions in order to generate buffer_load_{i8, i16}
317// instructions.
318
319// Identify buffer_load_{u8, u16}.
321 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) {
322 Register Op0Reg = MI.getOperand(1).getReg();
323 SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
324
325 if (!MRI.hasOneNonDBGUse(Op0Reg))
326 return false;
327
328 // Check if the first operand of the sign extension is a subword buffer load
329 // instruction.
330 return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
331 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
332}
333
334// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
335// buffer_load_{i8, i16}.
337 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) {
338 // Modify the opcode and the destination of buffer_load_{u8, u16}:
339 // Replace the opcode.
340 unsigned Opc =
341 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
342 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
343 : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
344 SubwordBufferLoad->setDesc(TII.get(Opc));
345 // Update the destination register of SubwordBufferLoad with the destination
346 // register of the sign extension.
347 Register SignExtendInsnDst = MI.getOperand(0).getReg();
348 SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
349 // Remove the sign extension.
350 MI.eraseFromParent();
351}
352
354protected:
357
358 // Note: pointer is necessary because Target Predicates use
359 // "Subtarget->"
361
362public:
366 const GCNSubtarget &Subtarget)
369};
370
371#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
372#include "AMDGPUGenPostLegalizeGICombiner.inc"
373#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
374
375namespace {
376#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
377#include "AMDGPUGenPostLegalizeGICombiner.inc"
378#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
379
380class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
381 GISelKnownBits *KB;
383 const GCNSubtarget &Subtarget;
384
385public:
386 AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
387
388 AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt,
389 bool OptSize, bool MinSize,
390 const AMDGPULegalizerInfo *LI,
392 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
393 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
394 KB(KB), MDT(MDT), Subtarget(Subtarget) {
395 if (!GeneratedRuleCfg.parseCommandLineOption())
396 report_fatal_error("Invalid rule identifier");
397 }
398
400 MachineIRBuilder &B) const override;
401};
402
403bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
405 MachineIRBuilder &B) const {
406 AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
407 LInfo);
408 AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
409 AMDGPUGenPostLegalizerCombinerHelper Generated(
410 GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget);
411
412 if (Generated.tryCombineAll(Observer, MI, B))
413 return true;
414
415 switch (MI.getOpcode()) {
416 case TargetOpcode::G_SHL:
417 case TargetOpcode::G_LSHR:
418 case TargetOpcode::G_ASHR:
419 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
420 // common case, splitting this into a move and a 32-bit shift is faster and
421 // the same code size.
422 return Helper.tryCombineShiftToUnmerge(MI, 32);
423 }
424
425 return false;
426}
427
428#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
429#include "AMDGPUGenPostLegalizeGICombiner.inc"
430#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
431
432// Pass boilerplate
433// ================
434
435class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
436public:
437 static char ID;
438
439 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
440
441 StringRef getPassName() const override {
442 return "AMDGPUPostLegalizerCombiner";
443 }
444
445 bool runOnMachineFunction(MachineFunction &MF) override;
446
447 void getAnalysisUsage(AnalysisUsage &AU) const override;
448private:
449 bool IsOptNone;
450};
451} // end anonymous namespace
452
453void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
455 AU.setPreservesCFG();
459 if (!IsOptNone) {
462 }
464}
465
466AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
467 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
469}
470
471bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
472 if (MF.getProperties().hasProperty(
473 MachineFunctionProperties::Property::FailedISel))
474 return false;
475 auto *TPC = &getAnalysis<TargetPassConfig>();
476 const Function &F = MF.getFunction();
477 bool EnableOpt =
478 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
479
481 const AMDGPULegalizerInfo *LI
482 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
483
484 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
486 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
487 AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(),
488 F.hasMinSize(), LI, KB, MDT);
489 Combiner C(PCInfo, TPC);
490 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
491}
492
493char AMDGPUPostLegalizerCombiner::ID = 0;
494INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
495 "Combine AMDGPU machine instrs after legalization",
496 false, false)
499INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
501 false)
502
503namespace llvm {
505 return new AMDGPUPostLegalizerCombiner(IsOptNone);
506}
507} // end namespace llvm
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after legalization
#define DEBUG_TYPE
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
Contains matchers for matching SSA Machine Instructions.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPUPostLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper, const GCNSubtarget &Subtarget)
AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper
void applyCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg)
bool matchCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchRcpSqrtToRsq(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo)
void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, const FMinFMaxLegacyInfo &Info)
void applyCvtF32UByteN(MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo)
bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info)
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
This class provides the information for the target register banks.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:721
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:735
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:724
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:733
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:722
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:723
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:732
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:726
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:729
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:730
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:725
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:727
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:734
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:731
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:720
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:728
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
GISelKnownBits * getKnownBits() const
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
Abstract class that contains various methods for clients to notify about changes.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
bool maskedValueIsZero(Register Val, const APInt &Mask)
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
virtual const TargetLowering * getTargetLowering() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
auto instrs(const MachineBasicBlock &BB)