Go to the documentation of this file.
27 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #define DEBUG_TYPE "amdgpu-regbank-combiner"
32 using namespace MIPatternMatch;
47 :
B(
B), MF(
B.getMF()),
MRI(*
B.getMRI()),
49 RBI(*Subtarget.getRegBankInfo()),
TRI(*Subtarget.getRegisterInfo()),
50 TII(*Subtarget.getInstrInfo()), Helper(Helper){};
56 unsigned Min, Max, Med;
66 template <
class m_Cst,
typename CstTy>
68 Register &Val, CstTy &K0, CstTy &K1);
78 AMDGPU::SIModeRegisterDefaults getMode();
87 return RBI.getRegBank(
Reg,
MRI,
TRI)->getID() == AMDGPU::VGPRRegBankID;
91 if (isVgprRegBank(
Reg))
97 if (
Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(
Def))
103 MRI.
setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
114 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
117 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
118 case AMDGPU::G_FMAXNUM:
119 case AMDGPU::G_FMINNUM:
120 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
121 case AMDGPU::G_FMAXNUM_IEEE:
122 case AMDGPU::G_FMINNUM_IEEE:
123 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
124 AMDGPU::G_AMDGPU_FMED3};
128 template <
class m_Cst,
typename CstTy>
132 CstTy &K0, CstTy &K1) {
153 if (!isVgprRegBank(Dst))
158 if ((Ty !=
LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
164 std::optional<ValueAndVReg> K0, K1;
166 if (!matchMed<GCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
169 if (OpcodeTriple.
Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
171 if (OpcodeTriple.
Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
174 MatchInfo = {OpcodeTriple.
Med, Val, K0->VReg, K1->VReg};
202 if ((Ty !=
LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
206 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
209 std::optional<FPValueAndVReg> K0, K1;
211 if (!matchMed<GFCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
214 if (K0->Value > K1->Value)
228 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
239 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
241 std::optional<FPValueAndVReg> K0, K1;
243 if (!matchMed<GFCstOrSplatGFCstMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
246 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
253 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(
MI) &&
274 if (
MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
283 if (isFCst(Src0) && !isFCst(Src1))
285 if (isFCst(Src1) && !isFCst(Src2))
287 if (isFCst(Src0) && !isFCst(Src1))
294 auto isOp3Zero = [&]() {
296 if (Op3->
getOpcode() == TargetOpcode::G_FCONSTANT)
305 (getIEEE() && getDX10Clamp() &&
315 B.setInstrAndDebugLoc(
MI);
316 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {
MI.getOperand(0)}, {
Reg},
318 MI.eraseFromParent();
323 B.setInstrAndDebugLoc(
MI);
324 B.buildInstr(MatchInfo.
Opc, {MI.getOperand(0)},
325 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
326 getAsVgpr(MatchInfo.Val2)},
328 MI.eraseFromParent();
331 AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
335 bool AMDGPURegBankCombinerHelper::getIEEE() {
return getMode().IEEE; }
337 bool AMDGPURegBankCombinerHelper::getDX10Clamp() {
return getMode().DX10Clamp; }
339 bool AMDGPURegBankCombinerHelper::isFminnumIeee(
const MachineInstr &
MI) {
340 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
344 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
347 bool AMDGPURegBankCombinerHelper::isClampZeroToOne(
MachineInstr *K0,
349 if (isFCst(K0) && isFCst(K1)) {
366 : Helper(Helper), RegBankHelper(RegBankHelper) {}
369 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
370 #include "AMDGPUGenRegBankGICombiner.inc"
371 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
374 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
375 #include "AMDGPUGenRegBankGICombiner.inc"
376 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
378 class AMDGPURegBankCombinerInfo final :
public CombinerInfo {
383 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
385 AMDGPURegBankCombinerInfo(
bool EnableOpt,
bool OptSize,
bool MinSize,
389 LI, EnableOpt, OptSize, MinSize),
391 if (!GeneratedRuleCfg.parseCommandLineOption())
404 AMDGPUGenRegBankCombinerHelper
Generated(GeneratedRuleCfg, Helper,
413 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
414 #include "AMDGPUGenRegBankGICombiner.inc"
415 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
424 AMDGPURegBankCombiner(
bool IsOptNone =
false);
427 return "AMDGPURegBankCombiner";
438 void AMDGPURegBankCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
451 AMDGPURegBankCombiner::AMDGPURegBankCombiner(
bool IsOptNone)
456 bool AMDGPURegBankCombiner::runOnMachineFunction(
MachineFunction &MF) {
458 MachineFunctionProperties::Property::FailedISel))
460 auto *TPC = &getAnalysis<TargetPassConfig>();
469 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
471 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
472 AMDGPURegBankCombinerInfo PCInfo(EnableOpt,
F.hasOptSize(),
473 F.hasMinSize(), LI, KB, MDT);
475 return C.combineMachineInstrs(MF,
nullptr);
480 "Combine AMDGPU machine instrs after regbankselect",
490 return new AMDGPURegBankCombiner(IsOptNone);
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
void applyClamp(MachineInstr &MI, Register &Reg)
MachineRegisterInfo & MRI
bool hasProperty(Property P) const
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
This is an optimization pass for GlobalISel generic memory operations.
operand_type_match m_Reg()
MinMaxMedOpc getMinMaxPair(unsigned Opc)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isVgprRegBank(Register Reg)
Combine AMDGPU machine instrs after regbankselect
Reg
All possible values of the reg field in the ModR/M byte.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg)
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, Register &Val, CstTy &K0, CstTy &K1)
auto instrs(const MachineBasicBlock &BB)
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This class provides the information for the target register banks.
AMDGPURegBankCombinerHelper & RegBankHelper
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
(vector float) vec_cmpeq(*A, *B) C
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
static bool isClampZeroToOne(SDValue A, SDValue B)
const HexagonInstrInfo * TII
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, AMDGPURegBankCombinerHelper &RegBankHelper)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ConstantFP - Floating Point Values [float, double].
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Holds all the information related to register banks.
Helper class to build MachineInstr.
Representation of each machine instruction.
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Register getAsVgpr(Register Reg)
const RegisterBankInfo & RBI
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
const GCNSubtarget & Subtarget
const ConstantFP * getFPImm() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Register getReg() const
getReg - Returns the register number.
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
Or< Preds... > m_any_of(Preds &&... preds)
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg)
Abstract class that contains various methods for clients to notify about changes.
bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
Function & getFunction()
Return the LLVM function that this machine code represents.
constexpr std::nullopt_t None
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
AnalysisUsage & addRequired()
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs after regbankselect", false, false) INITIALIZE_PASS_END(AMDGPURegBankCombiner
const TargetRegisterInfo & TRI
A Use represents the edge between a Value definition and its users.