Go to the documentation of this file.
26 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
32 using namespace MIPatternMatch;
44 :
B(
B), MF(
B.getMF()),
MRI(*
B.getMRI()), Helper(Helper){};
96 Info.True =
MI.getOperand(2).getReg();
97 Info.False =
MI.getOperand(3).getReg();
120 B.setInstrAndDebugLoc(
MI);
122 B.buildInstr(Opc, {
MI.getOperand(0)}, {
X,
Y},
MI.getFlags());
129 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.RHS,
Info.LHS);
131 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.LHS,
Info.RHS);
139 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.LHS,
Info.RHS);
141 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.RHS,
Info.LHS);
147 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.RHS,
Info.LHS);
149 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.LHS,
Info.RHS);
155 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.LHS,
Info.RHS);
157 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.RHS,
Info.LHS);
164 MI.eraseFromParent();
178 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
187 B.setInstrAndDebugLoc(
MI);
196 SrcReg =
B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
199 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
200 {SrcReg},
MI.getFlags());
202 auto Cvt0 =
B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
203 {SrcReg},
MI.getFlags());
204 B.buildFPTrunc(DstReg, Cvt0,
MI.getFlags());
207 MI.eraseFromParent();
215 if (
MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
216 MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
230 if ((RcpSrcMI = getRcpSrc(
MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
232 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {
MI.getOperand(0)},
false)
233 .addUse(SqrtSrcMI->getOperand(0).getReg())
234 .setMIFlags(
MI.getFlags());
240 if ((SqrtSrcMI = getSqrtSrc(
MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
242 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {
MI.getOperand(0)},
false)
244 .setMIFlags(
MI.getFlags());
263 const unsigned Offset =
MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
265 unsigned ShiftOffset = 8 * Offset;
267 ShiftOffset += ShiftAmt;
269 ShiftOffset -= ShiftAmt;
273 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
282 B.setInstrAndDebugLoc(
MI);
283 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.
ShiftOffset / 8;
290 CvtSrc =
B.buildAnyExt(S32, CvtSrc).getReg(0);
294 B.buildInstr(NewOpc, {
MI.getOperand(0)}, {CvtSrc},
MI.getFlags());
295 MI.eraseFromParent();
302 Reg =
MI.getOperand(1).getReg();
315 : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
318 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
319 #include "AMDGPUGenPostLegalizeGICombiner.inc"
320 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
323 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
324 #include "AMDGPUGenPostLegalizeGICombiner.inc"
325 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
327 class AMDGPUPostLegalizerCombinerInfo final :
public CombinerInfo {
332 AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
334 AMDGPUPostLegalizerCombinerInfo(
bool EnableOpt,
bool OptSize,
bool MinSize,
338 LI, EnableOpt, OptSize, MinSize),
340 if (!GeneratedRuleCfg.parseCommandLineOption())
353 AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
354 PostLegalizerHelper);
356 if (Generated.tryCombineAll(Observer,
MI,
B))
359 switch (
MI.getOpcode()) {
360 case TargetOpcode::G_SHL:
361 case TargetOpcode::G_LSHR:
362 case TargetOpcode::G_ASHR:
372 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
373 #include "AMDGPUGenPostLegalizeGICombiner.inc"
374 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
383 AMDGPUPostLegalizerCombiner(
bool IsOptNone =
false);
386 return "AMDGPUPostLegalizerCombiner";
397 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
410 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(
bool IsOptNone)
415 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
417 MachineFunctionProperties::Property::FailedISel))
419 auto *TPC = &getAnalysis<TargetPassConfig>();
428 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
430 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
431 AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt,
F.hasOptSize(),
432 F.hasMinSize(), LI, KB, MDT);
434 return C.combineMachineInstrs(MF,
nullptr);
439 "Combine AMDGPU machine instrs after legalization",
449 return new AMDGPUPostLegalizerCombiner(IsOptNone);
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasProperty(Property P) const
This is an optimization pass for GlobalISel generic memory operations.
bool maskedValueIsZero(Register Val, const APInt &Mask)
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
bool matchRcpSqrtToRsq(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
GISelKnownBits * getKnownBits() const
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Reg
All possible values of the reg field in the ModR/M byte.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
AMDGPUPostLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
This class provides the information for the target register banks.
bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg)
@ FCMP_ULT
1 1 0 0 True if unordered or less than
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs after legalization", false, false) INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
void applyUCharToFloat(MachineInstr &MI)
(vector float) vec_cmpeq(*A, *B) C
const MachineOperand & getOperand(unsigned i) const
TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Represent the analysis usage information of a pass.
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
const MachineFunctionProperties & getProperties() const
Get the function properties.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
void applyCvtF32UByteN(MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Analysis containing CSE Info
@ FCMP_OLT
0 1 0 0 True if ordered and less than
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bind_ty< CmpInst::Predicate > m_Pred(CmpInst::Predicate &P)
AMDGPUCombinerHelper & Helper
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool matchUCharToFloat(MachineInstr &MI)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Target-Independent Code Generator Pass Configuration Options.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Helper class to build MachineInstr.
Representation of each machine instruction.
bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
print Print MemDeps of function
Register getReg() const
getReg - Returns the register number.
bool hasFminFmaxLegacy() const
Class for arbitrary precision integers.
SmallVector< MachineOperand, 4 > Cond
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, const FMinFMaxLegacyInfo &Info)
Abstract class that contains various methods for clients to notify about changes.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Function & getFunction()
Return the LLVM function that this machine code represents.
ConstantMatch< APInt > m_ICst(APInt &Cst)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
Combine AMDGPU machine instrs after legalization
AMDGPUCombinerHelper & Helper
MachineRegisterInfo & MRI
virtual const TargetLowering * getTargetLowering() const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
FunctionPass class - This class is used to implement most global optimizations.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
AnalysisUsage & addRequired()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
@ FCMP_UEQ
1 0 0 1 True if unordered or equal