LLVM 20.0.0git
AMDGPURegBankCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after register banks are known.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38
39namespace {
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPURegBankCombinerImpl : public Combiner {
45protected:
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const RegisterBankInfo &RBI;
50 const SIInstrInfo &TII;
51 const CombinerHelper Helper;
52
53public:
54 AMDGPURegBankCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
62
63 bool tryCombineAll(MachineInstr &I) const override;
64
65 bool isVgprRegBank(Register Reg) const;
66 Register getAsVgpr(Register Reg) const;
67
68 struct MinMaxMedOpc {
69 unsigned Min, Max, Med;
70 };
71
72 struct Med3MatchInfo {
73 unsigned Opc;
74 Register Val0, Val1, Val2;
75 };
76
77 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
78
79 template <class m_Cst, typename CstTy>
80 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
81 Register &Val, CstTy &K0, CstTy &K1) const;
82
83 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
84 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
85 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
86 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
87 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
88 void applyClamp(MachineInstr &MI, Register &Reg) const;
89
90private:
91 SIModeRegisterDefaults getMode() const;
92 bool getIEEE() const;
93 bool getDX10Clamp() const;
94 bool isFminnumIeee(const MachineInstr &MI) const;
95 bool isFCst(MachineInstr *MI) const;
96 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
97
98#define GET_GICOMBINER_CLASS_MEMBERS
99#define AMDGPUSubtarget GCNSubtarget
100#include "AMDGPUGenRegBankGICombiner.inc"
101#undef GET_GICOMBINER_CLASS_MEMBERS
102#undef AMDGPUSubtarget
103};
104
105#define GET_GICOMBINER_IMPL
106#define AMDGPUSubtarget GCNSubtarget
107#include "AMDGPUGenRegBankGICombiner.inc"
108#undef AMDGPUSubtarget
109#undef GET_GICOMBINER_IMPL
110
111AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
112 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
113 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
114 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
115 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
116 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
117 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
118 TII(*STI.getInstrInfo()),
119 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
121#include "AMDGPUGenRegBankGICombiner.inc"
123{
124}
125
126bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
127 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
128}
129
130Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
131 if (isVgprRegBank(Reg))
132 return Reg;
133
134 // Search for existing copy of Reg to vgpr.
135 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
136 Register Def = Use.getOperand(0).getReg();
137 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
138 return Def;
139 }
140
141 // Copy Reg to vgpr.
142 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
143 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
144 return VgprReg;
145}
146
147AMDGPURegBankCombinerImpl::MinMaxMedOpc
148AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
149 switch (Opc) {
150 default:
151 llvm_unreachable("Unsupported opcode");
152 case AMDGPU::G_SMAX:
153 case AMDGPU::G_SMIN:
154 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
155 case AMDGPU::G_UMAX:
156 case AMDGPU::G_UMIN:
157 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
158 case AMDGPU::G_FMAXNUM:
159 case AMDGPU::G_FMINNUM:
160 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
161 case AMDGPU::G_FMAXNUM_IEEE:
162 case AMDGPU::G_FMINNUM_IEEE:
163 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
164 AMDGPU::G_AMDGPU_FMED3};
165 }
166}
167
168template <class m_Cst, typename CstTy>
169bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
171 MinMaxMedOpc MMMOpc, Register &Val,
172 CstTy &K0, CstTy &K1) const {
173 // 4 operand commutes of: min(max(Val, K0), K1).
174 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
175 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
176 // 4 operand commutes of: max(min(Val, K1), K0).
177 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
178 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
179 return mi_match(
180 MI, MRI,
181 m_any_of(
183 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
184 m_Cst(K1)),
186 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
187 m_Cst(K0))));
188}
189
190bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
191 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
192 Register Dst = MI.getOperand(0).getReg();
193 if (!isVgprRegBank(Dst))
194 return false;
195
196 // med3 for i16 is only available on gfx9+, and not available for v2i16.
197 LLT Ty = MRI.getType(Dst);
198 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
199 return false;
200
201 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
202 Register Val;
203 std::optional<ValueAndVReg> K0, K1;
204 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
205 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
206 return false;
207
208 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
209 return false;
210 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
211 return false;
212
213 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
214 return true;
215}
216
217// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
218// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
219// ieee = false : min/max(NaN, K) = K
220// clamp(NaN) = dx10_clamp ? 0.0 : NaN
221// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
222// Other operand commutes (see matchMed) give same result since min and max are
223// commutative.
224
225// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
226// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
227// Val = SNaN only for ieee = true
228// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
229// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
230// max(min(SNaN, K1), K0) = max(K1, K0) = K1
231// Val = NaN,ieee = false or Val = QNaN,ieee = true
232// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
233// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
234// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
235bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
236 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
237 Register Dst = MI.getOperand(0).getReg();
238 LLT Ty = MRI.getType(Dst);
239
240 // med3 for f16 is only available on gfx9+, and not available for v2f16.
241 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
242 return false;
243
244 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
245
246 Register Val;
247 std::optional<FPValueAndVReg> K0, K1;
248 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
249 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
250 return false;
251
252 if (K0->Value > K1->Value)
253 return false;
254
255 // For IEEE=false perform combine only when it's safe to assume that there are
256 // no NaN inputs. Most often MI is marked with nnan fast math flag.
257 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
258 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
259 // nodes(max/min) have same behavior when one input is NaN and other isn't.
260 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
261 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
262 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
263 // Don't fold single use constant that can't be inlined.
264 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
265 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
266 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
267 return true;
268 }
269 }
270
271 return false;
272}
273
274bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
275 Register &Reg) const {
276 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
277 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
278 Register Val;
279 std::optional<FPValueAndVReg> K0, K1;
280 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
281 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
282 return false;
283
284 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
285 return false;
286
287 // For IEEE=false perform combine only when it's safe to assume that there are
288 // no NaN inputs. Most often MI is marked with nnan fast math flag.
289 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
290 // to 0.0 requires dx10_clamp = true.
291 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
292 isKnownNeverSNaN(Val, MRI)) ||
293 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
294 Reg = Val;
295 return true;
296 }
297
298 return false;
299}
300
301// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
302// Val = SNaN only for ieee = true. It is important which operand is NaN.
303// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
304// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
305// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
306// Val = NaN,ieee = false or Val = QNaN,ieee = true
307// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
308// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
309// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
310bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
311 Register &Reg) const {
312 // In llvm-ir, clamp is often represented as an intrinsic call to
313 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
314 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
315 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
316 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
317
318 if (isFCst(Src0) && !isFCst(Src1))
319 std::swap(Src0, Src1);
320 if (isFCst(Src1) && !isFCst(Src2))
321 std::swap(Src1, Src2);
322 if (isFCst(Src0) && !isFCst(Src1))
323 std::swap(Src0, Src1);
324 if (!isClampZeroToOne(Src1, Src2))
325 return false;
326
327 Register Val = Src0->getOperand(0).getReg();
328
329 auto isOp3Zero = [&]() {
330 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
331 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
332 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
333 return false;
334 };
335 // For IEEE=false perform combine only when it's safe to assume that there are
336 // no NaN inputs. Most often MI is marked with nnan fast math flag.
337 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
338 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
339 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
340 (getIEEE() && getDX10Clamp() &&
341 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
342 Reg = Val;
343 return true;
344 }
345
346 return false;
347}
348
349void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
350 Register &Reg) const {
351 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
352 MI.getFlags());
353 MI.eraseFromParent();
354}
355
356void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
357 Med3MatchInfo &MatchInfo) const {
358 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
359 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
360 getAsVgpr(MatchInfo.Val2)},
361 MI.getFlags());
362 MI.eraseFromParent();
363}
364
365SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
366 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
367}
368
369bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
370
371bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
372 return getMode().DX10Clamp;
373}
374
375bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
376 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
377}
378
379bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
380 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
381}
382
383bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
384 MachineInstr *K1) const {
385 if (isFCst(K0) && isFCst(K1)) {
386 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
387 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
388 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
389 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
390 }
391 return false;
392}
393
394// Pass boilerplate
395// ================
396
397class AMDGPURegBankCombiner : public MachineFunctionPass {
398public:
399 static char ID;
400
401 AMDGPURegBankCombiner(bool IsOptNone = false);
402
403 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
404
405 bool runOnMachineFunction(MachineFunction &MF) override;
406
407 void getAnalysisUsage(AnalysisUsage &AU) const override;
408
409private:
410 bool IsOptNone;
411 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
412};
413} // end anonymous namespace
414
415void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
417 AU.setPreservesCFG();
421 if (!IsOptNone) {
424 }
426}
427
428AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
429 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
431
432 if (!RuleConfig.parseCommandLineOption())
433 report_fatal_error("Invalid rule identifier");
434}
435
436bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
437 if (MF.getProperties().hasProperty(
438 MachineFunctionProperties::Property::FailedISel))
439 return false;
440 auto *TPC = &getAnalysis<TargetPassConfig>();
441 const Function &F = MF.getFunction();
442 bool EnableOpt =
443 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
444
446 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
447
448 const auto *LI = ST.getLegalizerInfo();
450 IsOptNone ? nullptr
451 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
452
453 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
454 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
455 // Disable fixed-point iteration to reduce compile-time
456 CInfo.MaxIterations = 1;
457 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
458 // RegBankSelect seems not to leave dead instructions, so a full DCE pass is
459 // unnecessary.
460 CInfo.EnableFullDCE = false;
461 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
462 RuleConfig, ST, MDT, LI);
463 return Impl.combineMachineInstrs();
464}
465
466char AMDGPURegBankCombiner::ID = 0;
467INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
468 "Combine AMDGPU machine instrs after regbankselect",
469 false, false)
472INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
473 "Combine AMDGPU machine instrs after regbankselect", false,
474 false)
475
476namespace llvm {
478 return new AMDGPURegBankCombiner(IsOptNone);
479}
480} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after regbankselect
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#define DEBUG_TYPE
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
const HexagonInstrInfo * TII
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1119
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The CSE Analysis object.
Definition: CSEInfo.h:70
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Holds all the information related to register banks.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
operand_type_match m_Reg()
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Or< Preds... > m_any_of(Preds &&... preds)
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:471
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1153
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:338
auto instrs(const MachineBasicBlock &BB)
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860