27#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "amdgpu-regbank-combiner"
32using namespace MIPatternMatch;
66 template <
class m_Cst,
typename CstTy>
68 Register &Val, CstTy &K0, CstTy &K1);
114 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
117 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
118 case AMDGPU::G_FMAXNUM:
119 case AMDGPU::G_FMINNUM:
120 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
121 case AMDGPU::G_FMAXNUM_IEEE:
122 case AMDGPU::G_FMINNUM_IEEE:
123 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
124 AMDGPU::G_AMDGPU_FMED3};
128template <
class m_Cst,
typename CstTy>
132 CstTy &K0, CstTy &K1) {
164 std::optional<ValueAndVReg> K0, K1;
166 if (!matchMed<GCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
169 if (OpcodeTriple.
Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
171 if (OpcodeTriple.
Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
174 MatchInfo = {OpcodeTriple.
Med, Val, K0->VReg, K1->VReg};
209 std::optional<FPValueAndVReg> K0, K1;
211 if (!matchMed<GFCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
214 if (K0->Value > K1->Value)
228 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
241 std::optional<FPValueAndVReg> K0, K1;
243 if (!matchMed<GFCstOrSplatGFCstMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
246 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
253 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(
MI) &&
280 if (isFCst(Src0) && !isFCst(Src1))
282 if (isFCst(Src1) && !isFCst(Src2))
284 if (isFCst(Src0) && !isFCst(Src1))
286 if (!isClampZeroToOne(Src1, Src2))
291 auto isOp3Zero = [&]() {
293 if (Op3->
getOpcode() == TargetOpcode::G_FCONSTANT)
302 (getIEEE() && getDX10Clamp() &&
313 B.
buildInstr(AMDGPU::G_AMDGPU_CLAMP, {
MI.getOperand(0)}, {Reg},
315 MI.eraseFromParent();
322 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
323 getAsVgpr(MatchInfo.Val2)},
325 MI.eraseFromParent();
332bool AMDGPURegBankCombinerHelper::getIEEE() {
return getMode().
IEEE; }
334bool AMDGPURegBankCombinerHelper::getDX10Clamp() {
return getMode().
DX10Clamp; }
336bool AMDGPURegBankCombinerHelper::isFminnumIeee(
const MachineInstr &
MI) {
337 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
341 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
344bool AMDGPURegBankCombinerHelper::isClampZeroToOne(
MachineInstr *K0,
346 if (isFCst(K0) && isFCst(K1)) {
366#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
367#include "AMDGPUGenRegBankGICombiner.inc"
368#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
371#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
372#include "AMDGPUGenRegBankGICombiner.inc"
373#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
375class AMDGPURegBankCombinerInfo final :
public CombinerInfo {
380 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
382 AMDGPURegBankCombinerInfo(
bool EnableOpt,
bool OptSize,
bool MinSize,
386 LI, EnableOpt, OptSize, MinSize),
388 if (!GeneratedRuleCfg.parseCommandLineOption())
401 AMDGPUGenRegBankCombinerHelper
Generated(GeneratedRuleCfg, Helper,
410#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
411#include "AMDGPUGenRegBankGICombiner.inc"
412#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
421 AMDGPURegBankCombiner(
bool IsOptNone =
false);
424 return "AMDGPURegBankCombiner";
435void AMDGPURegBankCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
448AMDGPURegBankCombiner::AMDGPURegBankCombiner(
bool IsOptNone)
455 MachineFunctionProperties::Property::FailedISel))
457 auto *TPC = &getAnalysis<TargetPassConfig>();
466 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
468 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
469 AMDGPURegBankCombinerInfo PCInfo(EnableOpt,
F.hasOptSize(),
470 F.hasMinSize(), LI, KB, MDT);
472 return C.combineMachineInstrs(MF,
nullptr);
475char AMDGPURegBankCombiner::ID = 0;
477 "Combine AMDGPU machine instrs after regbankselect",
487 return new AMDGPURegBankCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after regbankselect
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPURegBankCombinerHelper & RegBankHelper
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, AMDGPURegBankCombinerHelper &RegBankHelper)
MachineRegisterInfo & MRI
const RegisterBankInfo & RBI
Register getAsVgpr(Register Reg)
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, Register &Val, CstTy &K0, CstTy &K1)
bool isVgprRegBank(Register Reg)
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
void applyClamp(MachineInstr &MI, Register &Reg)
const TargetRegisterInfo & TRI
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
MinMaxMedOpc getMinMaxPair(unsigned Opc)
const GCNSubtarget & Subtarget
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg)
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg)
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
This class provides the information for the target register banks.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
ConstantFP - Floating Point Values [float, double].
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
FunctionPass class - This class is used to implement most global optimizations.
Abstract class that contains various methods for clients to notify about changes.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isInlineConstant(const APInt &Imm) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
StringRef - Represent a constant reference to a string, i.e.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
operand_type_match m_Reg()
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Or< Preds... > m_any_of(Preds &&... preds)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
bool isKnownNeverNaN(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
auto instrs(const MachineBasicBlock &BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...