28#include "llvm/IR/IntrinsicsAMDGPU.h"
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
35#define DEBUG_TYPE "amdgpu-regbank-combiner"
38using namespace MIPatternMatch;
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenRegBankGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
45class AMDGPURegBankCombinerImpl :
public Combiner {
47 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
56 AMDGPURegBankCombinerImpl(
59 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
63 static const char *
getName() {
return "AMDGPURegBankCombinerImpl"; }
67 bool isVgprRegBank(
Register Reg)
const;
71 unsigned Min, Max, Med;
74 struct Med3MatchInfo {
79 MinMaxMedOpc getMinMaxPair(
unsigned Opc)
const;
81 template <
class m_Cst,
typename CstTy>
83 Register &Val, CstTy &K0, CstTy &K1)
const;
85 bool matchIntMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
86 bool matchFPMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
95 bool getDX10Clamp()
const;
100#define GET_GICOMBINER_CLASS_MEMBERS
101#define AMDGPUSubtarget GCNSubtarget
102#include "AMDGPUGenRegBankGICombiner.inc"
103#undef GET_GICOMBINER_CLASS_MEMBERS
104#undef AMDGPUSubtarget
107#define GET_GICOMBINER_IMPL
108#define AMDGPUSubtarget GCNSubtarget
109#include "AMDGPUGenRegBankGICombiner.inc"
110#undef AMDGPUSubtarget
111#undef GET_GICOMBINER_IMPL
113AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
116 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
118 :
Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
119 RBI(*STI.getRegBankInfo()),
TRI(*STI.getRegisterInfo()),
120 TII(*STI.getInstrInfo()),
121 Helper(Observer,
B,
false, &KB, MDT, LI),
123#include
"AMDGPUGenRegBankGICombiner.inc"
128bool AMDGPURegBankCombinerImpl::isVgprRegBank(
Register Reg)
const {
129 return RBI.getRegBank(Reg,
MRI,
TRI)->getID() == AMDGPU::VGPRRegBankID;
133 if (isVgprRegBank(Reg))
139 if (
Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
144 Register VgprReg =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
145 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
149AMDGPURegBankCombinerImpl::MinMaxMedOpc
150AMDGPURegBankCombinerImpl::getMinMaxPair(
unsigned Opc)
const {
156 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
159 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
160 case AMDGPU::G_FMAXNUM:
161 case AMDGPU::G_FMINNUM:
162 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
163 case AMDGPU::G_FMAXNUM_IEEE:
164 case AMDGPU::G_FMINNUM_IEEE:
165 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
166 AMDGPU::G_AMDGPU_FMED3};
170template <
class m_Cst,
typename CstTy>
174 CstTy &K0, CstTy &K1)
const {
192bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
195 if (!isVgprRegBank(Dst))
199 LLT Ty =
MRI.getType(Dst);
203 MinMaxMedOpc OpcodeTriple = getMinMaxPair(
MI.getOpcode());
205 std::optional<ValueAndVReg> K0, K1;
207 if (!matchMed<GCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
210 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
212 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
215 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
237bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
240 LLT Ty =
MRI.getType(Dst);
246 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
249 std::optional<FPValueAndVReg> K0, K1;
251 if (!matchMed<GFCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
254 if (K0->Value > K1->Value)
266 if ((!
MRI.hasOneNonDBGUse(K0->VReg) ||
TII.isInlineConstant(K0->Value)) &&
267 (!
MRI.hasOneNonDBGUse(K1->VReg) ||
TII.isInlineConstant(K1->Value))) {
268 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
276bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(
MachineInstr &
MI,
279 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
281 std::optional<FPValueAndVReg> K0, K1;
283 if (!matchMed<GFCstOrSplatGFCstMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
286 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
293 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(
MI) &&
312bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(
MachineInstr &
MI,
320 if (isFCst(Src0) && !isFCst(Src1))
322 if (isFCst(Src1) && !isFCst(Src2))
324 if (isFCst(Src0) && !isFCst(Src1))
331 auto isOp3Zero = [&]() {
333 if (Op3->
getOpcode() == TargetOpcode::G_FCONSTANT)
342 (getIEEE() && getDX10Clamp() &&
353 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {
MI.getOperand(0)}, {
Reg},
355 MI.eraseFromParent();
359 Med3MatchInfo &MatchInfo)
const {
360 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
361 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
362 getAsVgpr(MatchInfo.Val2)},
364 MI.eraseFromParent();
371bool AMDGPURegBankCombinerImpl::getIEEE()
const {
return getMode().IEEE; }
373bool AMDGPURegBankCombinerImpl::getDX10Clamp()
const {
374 return getMode().DX10Clamp;
377bool AMDGPURegBankCombinerImpl::isFminnumIeee(
const MachineInstr &
MI)
const {
378 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
382 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
385bool AMDGPURegBankCombinerImpl::isClampZeroToOne(
MachineInstr *K0,
387 if (isFCst(K0) && isFCst(K1)) {
403 AMDGPURegBankCombiner(
bool IsOptNone =
false);
413 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
417void AMDGPURegBankCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
430AMDGPURegBankCombiner::AMDGPURegBankCombiner(
bool IsOptNone)
434 if (!RuleConfig.parseCommandLineOption())
440 MachineFunctionProperties::Property::FailedISel))
442 auto *TPC = &getAnalysis<TargetPassConfig>();
448 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
450 const auto *LI =
ST.getLegalizerInfo();
453 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
456 LI, EnableOpt,
F.hasOptSize(),
F.hasMinSize());
457 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB,
nullptr,
458 RuleConfig, ST, MDT, LI);
459 return Impl.combineMachineInstrs();
462char AMDGPURegBankCombiner::ID = 0;
464 "Combine AMDGPU machine instrs after regbankselect",
474 return new AMDGPURegBankCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after regbankselect
#define GET_GICOMBINER_CONSTRUCTOR_INITS
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
virtual bool tryCombineAll(MachineInstr &I) const =0
ConstantFP - Floating Point Values [float, double].
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
FunctionPass class - This class is used to implement most global optimizations.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Holds all the information related to register banks.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Or< Preds... > m_any_of(Preds &&... preds)
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
auto instrs(const MachineBasicBlock &BB)
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.