26#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
32using namespace MIPatternMatch;
100 Info.True =
MI.getOperand(2).getReg();
101 Info.False =
MI.getOperand(3).getReg();
135 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.RHS,
Info.LHS);
137 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.LHS,
Info.RHS);
145 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.LHS,
Info.RHS);
147 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.RHS,
Info.LHS);
153 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.RHS,
Info.LHS);
155 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.LHS,
Info.RHS);
161 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY,
Info.LHS,
Info.RHS);
163 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY,
Info.RHS,
Info.LHS);
170 MI.eraseFromParent();
184 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
205 B.
buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
206 {SrcReg},
MI.getFlags());
208 auto Cvt0 =
B.
buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
209 {SrcReg},
MI.getFlags());
213 MI.eraseFromParent();
221 if (
MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
222 MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
238 if ((RcpSrcMI = getRcpSrc(
MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
241 .addUse(SqrtSrcMI->getOperand(0).getReg())
242 .setMIFlags(
MI.getFlags());
248 if ((SqrtSrcMI = getSqrtSrc(
MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
252 .setMIFlags(
MI.getFlags());
271 const unsigned Offset =
MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
273 unsigned ShiftOffset = 8 *
Offset;
275 ShiftOffset += ShiftAmt;
277 ShiftOffset -= ShiftAmt;
281 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
291 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.
ShiftOffset / 8;
303 MI.eraseFromParent();
310 Reg =
MI.getOperand(1).getReg();
330 return SubwordBufferLoad->
getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
331 SubwordBufferLoad->
getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
341 SubwordBufferLoad->
getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
342 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
343 : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
347 Register SignExtendInsnDst =
MI.getOperand(0).getReg();
350 MI.eraseFromParent();
371#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
372#include "AMDGPUGenPostLegalizeGICombiner.inc"
373#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
376#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
377#include "AMDGPUGenPostLegalizeGICombiner.inc"
378#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
380class AMDGPUPostLegalizerCombinerInfo final :
public CombinerInfo {
386 AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
388 AMDGPUPostLegalizerCombinerInfo(
const GCNSubtarget &Subtarget,
bool EnableOpt,
389 bool OptSize,
bool MinSize,
393 LI, EnableOpt, OptSize, MinSize),
394 KB(KB), MDT(MDT), Subtarget(Subtarget) {
395 if (!GeneratedRuleCfg.parseCommandLineOption())
409 AMDGPUGenPostLegalizerCombinerHelper
Generated(
410 GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget);
415 switch (
MI.getOpcode()) {
416 case TargetOpcode::G_SHL:
417 case TargetOpcode::G_LSHR:
418 case TargetOpcode::G_ASHR:
428#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
429#include "AMDGPUGenPostLegalizeGICombiner.inc"
430#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
439 AMDGPUPostLegalizerCombiner(
bool IsOptNone =
false);
442 return "AMDGPUPostLegalizerCombiner";
453void AMDGPUPostLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
466AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(
bool IsOptNone)
471bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
473 MachineFunctionProperties::Property::FailedISel))
475 auto *TPC = &getAnalysis<TargetPassConfig>();
484 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
486 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
487 AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt,
F.hasOptSize(),
488 F.hasMinSize(), LI, KB, MDT);
490 return C.combineMachineInstrs(MF,
nullptr);
493char AMDGPUPostLegalizerCombiner::ID = 0;
495 "Combine AMDGPU machine instrs after legalization",
505 return new AMDGPUPostLegalizerCombiner(IsOptNone);
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs after legalization
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPUPostLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper, const GCNSubtarget &Subtarget)
AMDGPUCombinerHelper & Helper
AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper
const GCNSubtarget * Subtarget
const GCNSubtarget & Subtarget
void applyCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo)
AMDGPUCombinerHelper & Helper
bool matchUCharToFloat(MachineInstr &MI)
void applyUCharToFloat(MachineInstr &MI)
bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg)
bool matchCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchRcpSqrtToRsq(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo)
MachineRegisterInfo & MRI
void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, const FMinFMaxLegacyInfo &Info)
void applyCvtF32UByteN(MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo)
bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info)
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
This class provides the information for the target register banks.
Class for arbitrary precision integers.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
GISelKnownBits * getKnownBits() const
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
FunctionPass class - This class is used to implement most global optimizations.
Abstract class that contains various methods for clients to notify about changes.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
bool maskedValueIsZero(Register Val, const APInt &Mask)
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
StringRef - Represent a constant reference to a string, i.e.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
virtual const TargetLowering * getTargetLowering() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
operand_type_match m_Reg()
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
auto instrs(const MachineBasicBlock &BB)