LLVM 17.0.0git
AMDGPURegBankCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after register banks are known.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
27#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "amdgpu-regbank-combiner"
30
31using namespace llvm;
32using namespace MIPatternMatch;
33
35protected:
44
45public:
47 : B(B), MF(B.getMF()), MRI(*B.getMRI()),
48 Subtarget(MF.getSubtarget<GCNSubtarget>()),
49 RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
50 TII(*Subtarget.getInstrInfo()), Helper(Helper){};
51
52 bool isVgprRegBank(Register Reg);
54
55 struct MinMaxMedOpc {
56 unsigned Min, Max, Med;
57 };
58
60 unsigned Opc;
62 };
63
64 MinMaxMedOpc getMinMaxPair(unsigned Opc);
65
66 template <class m_Cst, typename CstTy>
68 Register &Val, CstTy &K0, CstTy &K1);
69
74 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
76
77private:
78 SIModeRegisterDefaults getMode();
79 bool getIEEE();
80 bool getDX10Clamp();
81 bool isFminnumIeee(const MachineInstr &MI);
82 bool isFCst(MachineInstr *MI);
83 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
84};
85
87 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
88}
89
91 if (isVgprRegBank(Reg))
92 return Reg;
93
94 // Search for existing copy of Reg to vgpr.
95 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
96 Register Def = Use.getOperand(0).getReg();
97 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
98 return Def;
99 }
100
101 // Copy Reg to vgpr.
102 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
103 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
104 return VgprReg;
105}
106
109 switch (Opc) {
110 default:
111 llvm_unreachable("Unsupported opcode");
112 case AMDGPU::G_SMAX:
113 case AMDGPU::G_SMIN:
114 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
115 case AMDGPU::G_UMAX:
116 case AMDGPU::G_UMIN:
117 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
118 case AMDGPU::G_FMAXNUM:
119 case AMDGPU::G_FMINNUM:
120 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
121 case AMDGPU::G_FMAXNUM_IEEE:
122 case AMDGPU::G_FMINNUM_IEEE:
123 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
124 AMDGPU::G_AMDGPU_FMED3};
125 }
126}
127
128template <class m_Cst, typename CstTy>
131 MinMaxMedOpc MMMOpc, Register &Val,
132 CstTy &K0, CstTy &K1) {
133 // 4 operand commutes of: min(max(Val, K0), K1).
134 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
135 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
136 // 4 operand commutes of: max(min(Val, K1), K0).
137 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
138 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
139 return mi_match(
140 MI, MRI,
141 m_any_of(
143 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
144 m_Cst(K1)),
146 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
147 m_Cst(K0))));
148}
149
151 MachineInstr &MI, Med3MatchInfo &MatchInfo) {
152 Register Dst = MI.getOperand(0).getReg();
153 if (!isVgprRegBank(Dst))
154 return false;
155
156 // med3 for i16 is only available on gfx9+, and not available for v2i16.
157 LLT Ty = MRI.getType(Dst);
158 if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
159 Ty != LLT::scalar(32))
160 return false;
161
162 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
163 Register Val;
164 std::optional<ValueAndVReg> K0, K1;
165 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
166 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
167 return false;
168
169 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
170 return false;
171 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
172 return false;
173
174 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
175 return true;
176}
177
178// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
179// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
180// ieee = false : min/max(NaN, K) = K
181// clamp(NaN) = dx10_clamp ? 0.0 : NaN
182// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
183// Other operand commutes (see matchMed) give same result since min and max are
184// commutative.
185
186// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
187// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
188// Val = SNaN only for ieee = true
189// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
190// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
191// max(min(SNaN, K1), K0) = max(K1, K0) = K1
192// Val = NaN,ieee = false or Val = QNaN,ieee = true
193// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
194// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
195// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
197 MachineInstr &MI, Med3MatchInfo &MatchInfo) {
198 Register Dst = MI.getOperand(0).getReg();
199 LLT Ty = MRI.getType(Dst);
200
201 // med3 for f16 is only available on gfx9+, and not available for v2f16.
202 if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
203 Ty != LLT::scalar(32))
204 return false;
205
206 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
207
208 Register Val;
209 std::optional<FPValueAndVReg> K0, K1;
210 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
211 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
212 return false;
213
214 if (K0->Value > K1->Value)
215 return false;
216
217 // For IEEE=false perform combine only when it's safe to assume that there are
218 // no NaN inputs. Most often MI is marked with nnan fast math flag.
219 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
220 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
221 // nodes(max/min) have same behavior when one input is NaN and other isn't.
222 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
223 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
224 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
225 // Don't fold single use constant that can't be inlined.
226 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
227 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
228 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
229 return true;
230 }
231 }
232
233 return false;
234}
235
237 Register &Reg) {
238 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
239 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
240 Register Val;
241 std::optional<FPValueAndVReg> K0, K1;
242 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
243 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
244 return false;
245
246 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
247 return false;
248
249 // For IEEE=false perform combine only when it's safe to assume that there are
250 // no NaN inputs. Most often MI is marked with nnan fast math flag.
251 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
252 // to 0.0 requires dx10_clamp = true.
253 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
254 isKnownNeverSNaN(Val, MRI)) ||
255 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
256 Reg = Val;
257 return true;
258 }
259
260 return false;
261}
262
263// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
264// Val = SNaN only for ieee = true. It is important which operand is NaN.
265// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
266// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
267// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
268// Val = NaN,ieee = false or Val = QNaN,ieee = true
269// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
270// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
271// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
273 Register &Reg) {
274 // In llvm-ir, clamp is often represented as an intrinsic call to
275 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
276 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
277 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
278 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
279
280 if (isFCst(Src0) && !isFCst(Src1))
281 std::swap(Src0, Src1);
282 if (isFCst(Src1) && !isFCst(Src2))
283 std::swap(Src1, Src2);
284 if (isFCst(Src0) && !isFCst(Src1))
285 std::swap(Src0, Src1);
286 if (!isClampZeroToOne(Src1, Src2))
287 return false;
288
289 Register Val = Src0->getOperand(0).getReg();
290
291 auto isOp3Zero = [&]() {
292 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
293 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
294 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
295 return false;
296 };
297 // For IEEE=false perform combine only when it's safe to assume that there are
298 // no NaN inputs. Most often MI is marked with nnan fast math flag.
299 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
300 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
301 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
302 (getIEEE() && getDX10Clamp() &&
303 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
304 Reg = Val;
305 return true;
306 }
307
308 return false;
309}
310
313 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
314 MI.getFlags());
315 MI.eraseFromParent();
316}
317
319 Med3MatchInfo &MatchInfo) {
321 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
322 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
323 getAsVgpr(MatchInfo.Val2)},
324 MI.getFlags());
325 MI.eraseFromParent();
326}
327
328SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
329 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
330}
331
332bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
333
334bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
335
336bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
337 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
338}
339
340bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
341 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
342}
343
344bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
345 MachineInstr *K1) {
346 if (isFCst(K0) && isFCst(K1)) {
347 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
348 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
349 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
350 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
351 }
352 return false;
353}
354
356protected:
359
360public:
364};
365
366#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
367#include "AMDGPUGenRegBankGICombiner.inc"
368#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
369
370namespace {
371#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
372#include "AMDGPUGenRegBankGICombiner.inc"
373#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
374
375class AMDGPURegBankCombinerInfo final : public CombinerInfo {
376 GISelKnownBits *KB;
378
379public:
380 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
381
382 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
383 const AMDGPULegalizerInfo *LI,
385 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
386 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
387 KB(KB), MDT(MDT) {
388 if (!GeneratedRuleCfg.parseCommandLineOption())
389 report_fatal_error("Invalid rule identifier");
390 }
391
393 MachineIRBuilder &B) const override;
394};
395
396bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
398 MachineIRBuilder &B) const {
399 CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
400 AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
401 AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
402 RegBankHelper);
403
404 if (Generated.tryCombineAll(Observer, MI, B))
405 return true;
406
407 return false;
408}
409
410#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
411#include "AMDGPUGenRegBankGICombiner.inc"
412#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
413
414// Pass boilerplate
415// ================
416
417class AMDGPURegBankCombiner : public MachineFunctionPass {
418public:
419 static char ID;
420
421 AMDGPURegBankCombiner(bool IsOptNone = false);
422
423 StringRef getPassName() const override {
424 return "AMDGPURegBankCombiner";
425 }
426
427 bool runOnMachineFunction(MachineFunction &MF) override;
428
429 void getAnalysisUsage(AnalysisUsage &AU) const override;
430private:
431 bool IsOptNone;
432};
433} // end anonymous namespace
434
435void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
437 AU.setPreservesCFG();
441 if (!IsOptNone) {
444 }
446}
447
448AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
449 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
451}
452
453bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
454 if (MF.getProperties().hasProperty(
455 MachineFunctionProperties::Property::FailedISel))
456 return false;
457 auto *TPC = &getAnalysis<TargetPassConfig>();
458 const Function &F = MF.getFunction();
459 bool EnableOpt =
460 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
461
463 const AMDGPULegalizerInfo *LI
464 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
465
466 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
468 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
469 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
470 F.hasMinSize(), LI, KB, MDT);
471 Combiner C(PCInfo, TPC);
472 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
473}
474
475char AMDGPURegBankCombiner::ID = 0;
476INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
477 "Combine AMDGPU machine instrs after regbankselect",
478 false, false)
481INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
482 "Combine AMDGPU machine instrs after regbankselect", false,
483 false)
484
485namespace llvm {
487 return new AMDGPURegBankCombiner(IsOptNone);
488}
489} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after regbankselect
#define DEBUG_TYPE
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPURegBankCombinerHelper & RegBankHelper
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, AMDGPURegBankCombinerHelper &RegBankHelper)
const RegisterBankInfo & RBI
Register getAsVgpr(Register Reg)
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, Register &Val, CstTy &K0, CstTy &K1)
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
void applyClamp(MachineInstr &MI, Register &Reg)
const TargetRegisterInfo & TRI
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
MinMaxMedOpc getMinMaxPair(unsigned Opc)
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg)
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg)
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
This class provides the information for the target register banks.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1043
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasMed3_16() const
Definition: GCNSubtarget.h:385
Abstract class that contains various methods for clients to notify about changes.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:523
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:533
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:46
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isInlineConstant(const APInt &Imm) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Or< Preds... > m_any_of(Preds &&... preds)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:458
bool isKnownNeverNaN(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:892
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:301
auto instrs(const MachineBasicBlock &BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...