30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
37using namespace MIPatternMatch;
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
44class AMDGPURegBankCombinerImpl :
public Combiner {
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
54 AMDGPURegBankCombinerImpl(
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
61 static const char *
getName() {
return "AMDGPURegBankCombinerImpl"; }
65 bool isVgprRegBank(
Register Reg)
const;
69 unsigned Min, Max, Med;
72 struct Med3MatchInfo {
77 MinMaxMedOpc getMinMaxPair(
unsigned Opc)
const;
79 template <
class m_Cst,
typename CstTy>
81 Register &Val, CstTy &K0, CstTy &K1)
const;
83 bool matchIntMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
84 bool matchFPMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
93 bool getDX10Clamp()
const;
98#define GET_GICOMBINER_CLASS_MEMBERS
99#define AMDGPUSubtarget GCNSubtarget
100#include "AMDGPUGenRegBankGICombiner.inc"
101#undef GET_GICOMBINER_CLASS_MEMBERS
102#undef AMDGPUSubtarget
105#define GET_GICOMBINER_IMPL
106#define AMDGPUSubtarget GCNSubtarget
107#include "AMDGPUGenRegBankGICombiner.inc"
108#undef AMDGPUSubtarget
109#undef GET_GICOMBINER_IMPL
111AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
114 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
116 :
Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
117 RBI(*STI.getRegBankInfo()),
TRI(*STI.getRegisterInfo()),
118 TII(*STI.getInstrInfo()),
119 Helper(Observer,
B,
false, &KB, MDT, LI),
121#include
"AMDGPUGenRegBankGICombiner.inc"
126bool AMDGPURegBankCombinerImpl::isVgprRegBank(
Register Reg)
const {
127 return RBI.getRegBank(Reg,
MRI,
TRI)->getID() == AMDGPU::VGPRRegBankID;
131 if (isVgprRegBank(Reg))
137 if (
Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
142 Register VgprReg =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
143 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
147AMDGPURegBankCombinerImpl::MinMaxMedOpc
148AMDGPURegBankCombinerImpl::getMinMaxPair(
unsigned Opc)
const {
154 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
157 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
158 case AMDGPU::G_FMAXNUM:
159 case AMDGPU::G_FMINNUM:
160 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
161 case AMDGPU::G_FMAXNUM_IEEE:
162 case AMDGPU::G_FMINNUM_IEEE:
163 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
164 AMDGPU::G_AMDGPU_FMED3};
168template <
class m_Cst,
typename CstTy>
172 CstTy &K0, CstTy &K1)
const {
190bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
193 if (!isVgprRegBank(Dst))
197 LLT Ty =
MRI.getType(Dst);
201 MinMaxMedOpc OpcodeTriple = getMinMaxPair(
MI.getOpcode());
203 std::optional<ValueAndVReg> K0, K1;
205 if (!matchMed<GCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
208 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
210 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
213 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
235bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
238 LLT Ty =
MRI.getType(Dst);
244 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
247 std::optional<FPValueAndVReg> K0, K1;
249 if (!matchMed<GFCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
252 if (K0->Value > K1->Value)
264 if ((!
MRI.hasOneNonDBGUse(K0->VReg) ||
TII.isInlineConstant(K0->Value)) &&
265 (!
MRI.hasOneNonDBGUse(K1->VReg) ||
TII.isInlineConstant(K1->Value))) {
266 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
274bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(
MachineInstr &
MI,
277 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
279 std::optional<FPValueAndVReg> K0, K1;
281 if (!matchMed<GFCstOrSplatGFCstMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
284 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
291 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(
MI) &&
310bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(
MachineInstr &
MI,
318 if (isFCst(Src0) && !isFCst(Src1))
320 if (isFCst(Src1) && !isFCst(Src2))
322 if (isFCst(Src0) && !isFCst(Src1))
329 auto isOp3Zero = [&]() {
331 if (Op3->
getOpcode() == TargetOpcode::G_FCONSTANT)
340 (getIEEE() && getDX10Clamp() &&
351 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {
MI.getOperand(0)}, {
Reg},
353 MI.eraseFromParent();
357 Med3MatchInfo &MatchInfo)
const {
358 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
359 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
360 getAsVgpr(MatchInfo.Val2)},
362 MI.eraseFromParent();
369bool AMDGPURegBankCombinerImpl::getIEEE()
const {
return getMode().IEEE; }
371bool AMDGPURegBankCombinerImpl::getDX10Clamp()
const {
372 return getMode().DX10Clamp;
375bool AMDGPURegBankCombinerImpl::isFminnumIeee(
const MachineInstr &
MI)
const {
376 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
380 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
383bool AMDGPURegBankCombinerImpl::isClampZeroToOne(
MachineInstr *K0,
385 if (isFCst(K0) && isFCst(K1)) {
401 AMDGPURegBankCombiner(
bool IsOptNone =
false);
411 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
415void AMDGPURegBankCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
428AMDGPURegBankCombiner::AMDGPURegBankCombiner(
bool IsOptNone)
432 if (!RuleConfig.parseCommandLineOption())
438 MachineFunctionProperties::Property::FailedISel))
440 auto *TPC = &getAnalysis<TargetPassConfig>();
446 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
448 const auto *LI =
ST.getLegalizerInfo();
451 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
454 LI, EnableOpt,
F.hasOptSize(),
F.hasMinSize());
456 CInfo.MaxIterations = 1;
457 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
460 CInfo.EnableFullDCE =
false;
461 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB,
nullptr,
462 RuleConfig, ST, MDT, LI);
463 return Impl.combineMachineInstrs();
466char AMDGPURegBankCombiner::ID = 0;
468 "Combine AMDGPU machine instrs after regbankselect",
478 return new AMDGPURegBankCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after regbankselect
#define GET_GICOMBINER_CONSTRUCTOR_INITS
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
virtual bool tryCombineAll(MachineInstr &I) const =0
ConstantFP - Floating Point Values [float, double].
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
FunctionPass class - This class is used to implement most global optimizations.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Holds all the information related to register banks.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Or< Preds... > m_any_of(Preds &&... preds)
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
auto instrs(const MachineBasicBlock &BB)
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.