LLVM 23.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const AMDGPUCombinerHelper Helper;
49
50public:
51 AMDGPUPreLegalizerCombinerImpl(
53 GISelCSEInfo *CSEInfo,
54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
55 const GCNSubtarget &STI, MachineDominatorTree *MDT,
56 const LegalizerInfo *LI);
57
58 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
59
60 bool tryCombineAllImpl(MachineInstr &MI) const;
61 bool tryCombineAll(MachineInstr &I) const override;
62
63 struct ClampI64ToI16MatchInfo {
64 int64_t Cmp1 = 0;
65 int64_t Cmp2 = 0;
66 Register Origin;
67 };
68
69 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
70 const MachineFunction &MF,
71 ClampI64ToI16MatchInfo &MatchInfo) const;
72
73 void applyClampI64ToI16(MachineInstr &MI,
74 const ClampI64ToI16MatchInfo &MatchInfo) const;
75
76private:
77#define GET_GICOMBINER_CLASS_MEMBERS
78#define AMDGPUSubtarget GCNSubtarget
79#include "AMDGPUGenPreLegalizeGICombiner.inc"
80#undef GET_GICOMBINER_CLASS_MEMBERS
81#undef AMDGPUSubtarget
82};
83
84#define GET_GICOMBINER_IMPL
85#define AMDGPUSubtarget GCNSubtarget
86#include "AMDGPUGenPreLegalizeGICombiner.inc"
87#undef AMDGPUSubtarget
88#undef GET_GICOMBINER_IMPL
89
90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92 GISelCSEInfo *CSEInfo,
93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
94 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
95 : Combiner(MF, CInfo, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI),
98#include "AMDGPUGenPreLegalizeGICombiner.inc"
100{
101}
102
103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104 if (tryCombineAllImpl(MI))
105 return true;
106 return false;
107}
108
109bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
110 MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
111 ClampI64ToI16MatchInfo &MatchInfo) const {
112 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
113
114 // Try to find a pattern where an i64 value should get clamped to short.
115 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
116 if (SrcType != LLT::scalar(64))
117 return false;
118
119 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
120 if (DstType != LLT::scalar(16))
121 return false;
122
124
125 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
126 const auto Cmp1 = MatchInfo.Cmp1;
127 const auto Cmp2 = MatchInfo.Cmp2;
128 const auto Diff = std::abs(Cmp2 - Cmp1);
129
130 // If the difference between both comparison values is 0 or 1, there is no
131 // need to clamp.
132 if (Diff == 0 || Diff == 1)
133 return false;
134
135 const int64_t Min = std::numeric_limits<int16_t>::min();
136 const int64_t Max = std::numeric_limits<int16_t>::max();
137
138 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
139 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
140 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
141 };
142
143 // Try to match a combination of min / max MIR opcodes.
144 if (mi_match(MI.getOperand(1).getReg(), MRI,
145 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
146 if (mi_match(Base, MRI,
147 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
148 return IsApplicableForCombine();
149 }
150 }
151
152 if (mi_match(MI.getOperand(1).getReg(), MRI,
153 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
154 if (mi_match(Base, MRI,
155 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
156 return IsApplicableForCombine();
157 }
158 }
159
160 return false;
161}
162
163// We want to find a combination of instructions that
164// gets generated when an i64 gets clamped to i16.
165// The corresponding pattern is:
166// G_MAX / G_MAX for i16 <= G_TRUNC i64.
167// This can be efficiently written as following:
168// v_cvt_pk_i16_i32 v0, v0, v1
169// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
170void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
171 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
172
173 Register Src = MatchInfo.Origin;
174 assert(MI.getMF()->getRegInfo().getType(Src) == LLT::scalar(64));
175 const LLT S32 = LLT::scalar(32);
176
177 auto Unmerge = B.buildUnmerge(S32, Src);
178
179 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
180
181 const LLT V2S16 = LLT::fixed_vector(2, 16);
182 auto CvtPk =
183 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
184 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
185
186 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
187 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
188 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
189 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
190
191 auto Bitcast = B.buildBitcast({S32}, CvtPk);
192
193 auto Med3 = B.buildInstr(
194 AMDGPU::G_AMDGPU_SMED3, {S32},
195 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
196 MI.getFlags());
197
198 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
199
200 MI.eraseFromParent();
201}
202
203// Pass boilerplate
204// ================
205
206class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
207public:
208 static char ID;
209
210 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
211
212 StringRef getPassName() const override {
213 return "AMDGPUPreLegalizerCombiner";
214 }
215
216 bool runOnMachineFunction(MachineFunction &MF) override;
217
218 void getAnalysisUsage(AnalysisUsage &AU) const override;
219
220private:
221 bool IsOptNone;
222 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
223};
224} // end anonymous namespace
225
226void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
227 AU.addRequired<TargetPassConfig>();
228 AU.setPreservesCFG();
230 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
231 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
232 if (!IsOptNone) {
233 AU.addRequired<MachineDominatorTreeWrapperPass>();
234 AU.addPreserved<MachineDominatorTreeWrapperPass>();
235 }
236
237 AU.addRequired<GISelCSEAnalysisWrapperPass>();
238 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
240}
241
242AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
243 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
244 if (!RuleConfig.parseCommandLineOption())
245 report_fatal_error("Invalid rule identifier");
246}
247
248bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
249 if (MF.getProperties().hasFailedISel())
250 return false;
251 auto *TPC = &getAnalysis<TargetPassConfig>();
252 const Function &F = MF.getFunction();
253 bool EnableOpt =
254 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
256 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
257
258 // Enable CSE.
260 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
261 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
262
263 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
265 IsOptNone ? nullptr
266 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
267 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
268 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
269 // Disable fixed-point iteration to reduce compile-time
270 CInfo.MaxIterations = 1;
271 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
272 // This is the first Combiner, so the input IR might contain dead
273 // instructions.
274 CInfo.EnableFullDCE = true;
275 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, STI,
276 MDT, STI.getLegalizerInfo());
277 return Impl.combineMachineInstrs();
278}
279
280char AMDGPUPreLegalizerCombiner::ID = 0;
281INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
282 "Combine AMDGPU machine instrs before legalization",
283 false, false)
286INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
287 "Combine AMDGPU machine instrs before legalization", false,
288 false)
289
291 return new AMDGPUPreLegalizerCombiner(IsOptNone);
292}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
constexpr LLT V2S16
constexpr LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const LegalizerInfo * getLegalizerInfo() const override
Simple wrapper that does the following.
Definition CSEInfo.h:212
The CSE Analysis object.
Definition CSEInfo.h:72
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1147
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...