30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
44class AMDGPUPreLegalizerCombinerImpl :
public Combiner {
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
51 AMDGPUPreLegalizerCombinerImpl(
54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
58 static const char *
getName() {
return "AMDGPUPreLegalizerCombinerImpl"; }
63 struct ClampI64ToI16MatchInfo {
71 ClampI64ToI16MatchInfo &MatchInfo)
const;
74 const ClampI64ToI16MatchInfo &MatchInfo)
const;
77#define GET_GICOMBINER_CLASS_MEMBERS
78#define AMDGPUSubtarget GCNSubtarget
79#include "AMDGPUGenPreLegalizeGICombiner.inc"
80#undef GET_GICOMBINER_CLASS_MEMBERS
84#define GET_GICOMBINER_IMPL
85#define AMDGPUSubtarget GCNSubtarget
86#include "AMDGPUGenPreLegalizeGICombiner.inc"
88#undef GET_GICOMBINER_IMPL
90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95 :
Combiner(MF, CInfo, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96 Helper(Observer,
B,
true, &VT, MDT, LI, STI),
98#include
"AMDGPUGenPreLegalizeGICombiner.inc"
103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(
MachineInstr &
MI)
const {
104 if (tryCombineAllImpl(
MI))
109bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
110 MachineInstr &
MI,
const MachineRegisterInfo &MRI,
const MachineFunction &MF,
111 ClampI64ToI16MatchInfo &MatchInfo)
const {
112 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC &&
"Invalid instruction!");
115 const LLT SrcType = MRI.
getType(
MI.getOperand(1).getReg());
119 const LLT DstType = MRI.
getType(
MI.getOperand(0).getReg());
125 auto IsApplicableForCombine = [&MatchInfo]() ->
bool {
126 const auto Cmp1 = MatchInfo.Cmp1;
127 const auto Cmp2 = MatchInfo.Cmp2;
128 const auto Diff = std::abs(Cmp2 - Cmp1);
132 if (Diff == 0 || Diff == 1)
135 const int64_t Min = std::numeric_limits<int16_t>::min();
136 const int64_t
Max = std::numeric_limits<int16_t>::max();
139 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
140 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
148 return IsApplicableForCombine();
156 return IsApplicableForCombine();
170void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
171 MachineInstr &
MI,
const ClampI64ToI16MatchInfo &MatchInfo)
const {
177 auto Unmerge =
B.buildUnmerge(
S32, Src);
179 assert(
MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
183 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {
V2S16},
184 {Unmerge.getReg(0), Unmerge.getReg(1)},
MI.getFlags());
186 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
187 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
188 auto MinBoundaryDst =
B.buildConstant(
S32, MinBoundary);
189 auto MaxBoundaryDst =
B.buildConstant(
S32, MaxBoundary);
193 auto Med3 =
B.buildInstr(
194 AMDGPU::G_AMDGPU_SMED3, {
S32},
195 {MinBoundaryDst.getReg(0),
Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
198 B.buildTrunc(
MI.getOperand(0).getReg(), Med3);
200 MI.eraseFromParent();
206class AMDGPUPreLegalizerCombiner :
public MachineFunctionPass {
210 AMDGPUPreLegalizerCombiner(
bool IsOptNone =
false);
212 StringRef getPassName()
const override {
213 return "AMDGPUPreLegalizerCombiner";
216 bool runOnMachineFunction(MachineFunction &MF)
override;
218 void getAnalysisUsage(AnalysisUsage &AU)
const override;
222 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
226void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU)
const {
230 AU.
addRequired<GISelValueTrackingAnalysisLegacy>();
242AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(
bool IsOptNone)
243 : MachineFunctionPass(
ID), IsOptNone(IsOptNone) {
244 if (!RuleConfig.parseCommandLineOption())
248bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
251 auto *TPC = &getAnalysis<TargetPassConfig>();
256 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
260 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
261 auto *CSEInfo = &
Wrapper.get(TPC->getCSEConfig());
266 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
268 nullptr, EnableOpt,
F.hasOptSize(),
F.hasMinSize());
270 CInfo.MaxIterations = 1;
274 CInfo.EnableFullDCE =
true;
275 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, STI,
277 return Impl.combineMachineInstrs();
280char AMDGPUPreLegalizerCombiner::ID = 0;
282 "Combine AMDGPU machine instrs before legalization",
287 "Combine AMDGPU machine instrs before legalization",
false,
291 return new AMDGPUPreLegalizerCombiner(IsOptNone);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
const LegalizerInfo * getLegalizerInfo() const override
Simple wrapper that does the following.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Wrapper class representing virtual and physical registers.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...