30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
37using namespace MIPatternMatch;
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
44class AMDGPUPreLegalizerCombinerImpl :
public Combiner {
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
52 AMDGPUPreLegalizerCombinerImpl(
55 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
59 static const char *
getName() {
return "AMDGPUPreLegalizerCombinerImpl"; }
64 struct ClampI64ToI16MatchInfo {
72 ClampI64ToI16MatchInfo &MatchInfo)
const;
75 const ClampI64ToI16MatchInfo &MatchInfo)
const;
78#define GET_GICOMBINER_CLASS_MEMBERS
79#define AMDGPUSubtarget GCNSubtarget
80#include "AMDGPUGenPreLegalizeGICombiner.inc"
81#undef GET_GICOMBINER_CLASS_MEMBERS
85#define GET_GICOMBINER_IMPL
86#define AMDGPUSubtarget GCNSubtarget
87#include "AMDGPUGenPreLegalizeGICombiner.inc"
89#undef GET_GICOMBINER_IMPL
91AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
94 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
96 :
Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97 Helper(Observer,
B,
true, &KB, MDT, LI),
99#include
"AMDGPUGenPreLegalizeGICombiner.inc"
104bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(
MachineInstr &
MI)
const {
105 if (tryCombineAllImpl(
MI))
108 switch (
MI.getOpcode()) {
109 case TargetOpcode::G_CONCAT_VECTORS:
110 return Helper.tryCombineConcatVectors(
MI);
111 case TargetOpcode::G_SHUFFLE_VECTOR:
112 return Helper.tryCombineShuffleVector(
MI);
118bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
120 ClampI64ToI16MatchInfo &MatchInfo)
const {
121 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC &&
"Invalid instruction!");
124 const LLT SrcType =
MRI.getType(
MI.getOperand(1).getReg());
128 const LLT DstType =
MRI.getType(
MI.getOperand(0).getReg());
134 auto IsApplicableForCombine = [&MatchInfo]() ->
bool {
135 const auto Cmp1 = MatchInfo.Cmp1;
136 const auto Cmp2 = MatchInfo.Cmp2;
137 const auto Diff = std::abs(Cmp2 - Cmp1);
141 if (Diff == 0 || Diff == 1)
144 const int64_t Min = std::numeric_limits<int16_t>::min();
145 const int64_t
Max = std::numeric_limits<int16_t>::max();
148 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
149 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
157 return IsApplicableForCombine();
165 return IsApplicableForCombine();
179void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
180 MachineInstr &
MI,
const ClampI64ToI16MatchInfo &MatchInfo)
const {
183 assert(
MI.getParent()->getParent()->getRegInfo().getType(Src) ==
187 B.setInstrAndDebugLoc(
MI);
189 auto Unmerge =
B.buildUnmerge(S32, Src);
191 assert(
MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
195 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
196 {Unmerge.getReg(0), Unmerge.getReg(1)},
MI.getFlags());
198 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
199 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
200 auto MinBoundaryDst =
B.buildConstant(S32, MinBoundary);
201 auto MaxBoundaryDst =
B.buildConstant(S32, MaxBoundary);
203 auto Bitcast =
B.buildBitcast({S32}, CvtPk);
205 auto Med3 =
B.buildInstr(
206 AMDGPU::G_AMDGPU_SMED3, {S32},
207 {MinBoundaryDst.getReg(0),
Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
210 B.buildTrunc(
MI.getOperand(0).getReg(), Med3);
212 MI.eraseFromParent();
222 AMDGPUPreLegalizerCombiner(
bool IsOptNone =
false);
225 return "AMDGPUPreLegalizerCombiner";
234 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
238void AMDGPUPreLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
254AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(
bool IsOptNone)
258 if (!RuleConfig.parseCommandLineOption())
262bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
264 MachineFunctionProperties::Property::FailedISel))
266 auto *TPC = &getAnalysis<TargetPassConfig>();
270 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
274 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
275 auto *CSEInfo = &
Wrapper.get(TPC->getCSEConfig());
279 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
281 nullptr, EnableOpt,
F.hasOptSize(),
F.hasMinSize());
282 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
284 return Impl.combineMachineInstrs();
287char AMDGPUPreLegalizerCombiner::ID = 0;
289 "Combine AMDGPU machine instrs before legalization",
299 return new AMDGPUPreLegalizerCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs before legalization
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
const LegalizerInfo * getLegalizerInfo() const override
The actual analysis pass wrapper.
Simple wrapper that does the following.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
auto instrs(const MachineBasicBlock &BB)