29#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
32using namespace MIPatternMatch;
63 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC &&
"Invalid instruction!");
76 auto IsApplicableForCombine = [&MatchInfo]() ->
bool {
77 const auto Cmp1 = MatchInfo.
Cmp1;
78 const auto Cmp2 = MatchInfo.
Cmp2;
79 const auto Diff = std::abs(Cmp2 - Cmp1);
83 if (Diff == 0 || Diff == 1)
86 const int64_t Min = std::numeric_limits<int16_t>::min();
87 const int64_t Max = std::numeric_limits<int16_t>::max();
90 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
91 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
99 return IsApplicableForCombine();
107 return IsApplicableForCombine();
125 assert(
MI.getParent()->getParent()->getRegInfo().getType(Src) ==
133 assert(
MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
137 B.
buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
138 {Unmerge.getReg(0), Unmerge.getReg(1)},
MI.getFlags());
140 auto MinBoundary = std::min(MatchInfo.
Cmp1, MatchInfo.
Cmp2);
141 auto MaxBoundary = std::max(MatchInfo.
Cmp1, MatchInfo.
Cmp2);
148 AMDGPU::G_AMDGPU_SMED3, {S32},
149 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
154 MI.eraseFromParent();
169#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
170#include "AMDGPUGenPreLegalizeGICombiner.inc"
171#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
174#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
175#include "AMDGPUGenPreLegalizeGICombiner.inc"
176#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
178class AMDGPUPreLegalizerCombinerInfo final :
public CombinerInfo {
183 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
185 AMDGPUPreLegalizerCombinerInfo(
bool EnableOpt,
bool OptSize,
bool MinSize,
188 nullptr, EnableOpt, OptSize, MinSize),
190 if (!GeneratedRuleCfg.parseCommandLineOption())
201 const auto *LI =
MI.getMF()->getSubtarget().getLegalizerInfo();
204 AMDGPUGenPreLegalizerCombinerHelper
Generated(GeneratedRuleCfg, Helper,
210 switch (
MI.getOpcode()) {
211 case TargetOpcode::G_CONCAT_VECTORS:
212 return Helper.tryCombineConcatVectors(
MI);
213 case TargetOpcode::G_SHUFFLE_VECTOR:
214 return Helper.tryCombineShuffleVector(
MI);
220#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
221#include "AMDGPUGenPreLegalizeGICombiner.inc"
222#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
231 AMDGPUPreLegalizerCombiner(
bool IsOptNone =
false);
234 return "AMDGPUPreLegalizerCombiner";
245void AMDGPUPreLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
261AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(
bool IsOptNone)
266bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
268 MachineFunctionProperties::Property::FailedISel))
270 auto *TPC = &getAnalysis<TargetPassConfig>();
274 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
276 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
277 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt,
F.hasOptSize(),
278 F.hasMinSize(), KB, MDT);
281 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
282 auto *CSEInfo = &
Wrapper.get(TPC->getCSEConfig());
285 return C.combineMachineInstrs(MF, CSEInfo);
288char AMDGPUPreLegalizerCombiner::ID = 0;
290 "Combine AMDGPU machine instrs before legalization",
300 return new AMDGPUPreLegalizerCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs before legalization
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPUCombinerHelper & Helper
AMDGPUPreLegalizerCombinerHelper & PreLegalizerHelper
AMDGPUPreLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, ClampI64ToI16MatchInfo &MatchInfo)
AMDGPUCombinerHelper & Helper
MachineRegisterInfo & MRI
AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
void applyClampI64ToI16(MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
Simple wrapper that does the following.
Abstract class that contains various methods for clients to notify about changes.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
auto instrs(const MachineBasicBlock &BB)