LLVM 20.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const AMDGPUCombinerHelper Helper;
49
50public:
51 AMDGPUPreLegalizerCombinerImpl(
52 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
53 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
55 const GCNSubtarget &STI, MachineDominatorTree *MDT,
56 const LegalizerInfo *LI);
57
58 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
59
60 bool tryCombineAllImpl(MachineInstr &MI) const;
61 bool tryCombineAll(MachineInstr &I) const override;
62
63 struct ClampI64ToI16MatchInfo {
64 int64_t Cmp1 = 0;
65 int64_t Cmp2 = 0;
66 Register Origin;
67 };
68
69 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
70 const MachineFunction &MF,
71 ClampI64ToI16MatchInfo &MatchInfo) const;
72
73 void applyClampI64ToI16(MachineInstr &MI,
74 const ClampI64ToI16MatchInfo &MatchInfo) const;
75
76private:
77#define GET_GICOMBINER_CLASS_MEMBERS
78#define AMDGPUSubtarget GCNSubtarget
79#include "AMDGPUGenPreLegalizeGICombiner.inc"
80#undef GET_GICOMBINER_CLASS_MEMBERS
81#undef AMDGPUSubtarget
82};
83
84#define GET_GICOMBINER_IMPL
85#define AMDGPUSubtarget GCNSubtarget
86#include "AMDGPUGenPreLegalizeGICombiner.inc"
87#undef AMDGPUSubtarget
88#undef GET_GICOMBINER_IMPL
89
90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
91 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
92 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
94 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
95 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96 Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
98#include "AMDGPUGenPreLegalizeGICombiner.inc"
100{
101}
102
103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104 if (tryCombineAllImpl(MI))
105 return true;
106
107 switch (MI.getOpcode()) {
108 case TargetOpcode::G_SHUFFLE_VECTOR:
109 return Helper.tryCombineShuffleVector(MI);
110 }
111
112 return false;
113}
114
115bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
117 ClampI64ToI16MatchInfo &MatchInfo) const {
118 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
119
120 // Try to find a pattern where an i64 value should get clamped to short.
121 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
122 if (SrcType != LLT::scalar(64))
123 return false;
124
125 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
126 if (DstType != LLT::scalar(16))
127 return false;
128
130
131 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
132 const auto Cmp1 = MatchInfo.Cmp1;
133 const auto Cmp2 = MatchInfo.Cmp2;
134 const auto Diff = std::abs(Cmp2 - Cmp1);
135
136 // If the difference between both comparison values is 0 or 1, there is no
137 // need to clamp.
138 if (Diff == 0 || Diff == 1)
139 return false;
140
141 const int64_t Min = std::numeric_limits<int16_t>::min();
142 const int64_t Max = std::numeric_limits<int16_t>::max();
143
144 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
145 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
146 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
147 };
148
149 // Try to match a combination of min / max MIR opcodes.
150 if (mi_match(MI.getOperand(1).getReg(), MRI,
151 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
152 if (mi_match(Base, MRI,
153 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
154 return IsApplicableForCombine();
155 }
156 }
157
158 if (mi_match(MI.getOperand(1).getReg(), MRI,
159 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
160 if (mi_match(Base, MRI,
161 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
162 return IsApplicableForCombine();
163 }
164 }
165
166 return false;
167}
168
169// We want to find a combination of instructions that
170// gets generated when an i64 gets clamped to i16.
171// The corresponding pattern is:
172// G_MAX / G_MAX for i16 <= G_TRUNC i64.
173// This can be efficiently written as following:
174// v_cvt_pk_i16_i32 v0, v0, v1
175// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
176void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
177 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
178
179 Register Src = MatchInfo.Origin;
180 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
181 LLT::scalar(64));
182 const LLT S32 = LLT::scalar(32);
183
184 auto Unmerge = B.buildUnmerge(S32, Src);
185
186 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
187
188 const LLT V2S16 = LLT::fixed_vector(2, 16);
189 auto CvtPk =
190 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
191 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
192
193 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
194 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
195 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
196 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
197
198 auto Bitcast = B.buildBitcast({S32}, CvtPk);
199
200 auto Med3 = B.buildInstr(
201 AMDGPU::G_AMDGPU_SMED3, {S32},
202 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
203 MI.getFlags());
204
205 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
206
207 MI.eraseFromParent();
208}
209
210// Pass boilerplate
211// ================
212
213class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
214public:
215 static char ID;
216
217 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
218
219 StringRef getPassName() const override {
220 return "AMDGPUPreLegalizerCombiner";
221 }
222
223 bool runOnMachineFunction(MachineFunction &MF) override;
224
225 void getAnalysisUsage(AnalysisUsage &AU) const override;
226
227private:
228 bool IsOptNone;
229 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
230};
231} // end anonymous namespace
232
233void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
235 AU.setPreservesCFG();
239 if (!IsOptNone) {
242 }
243
247}
248
249AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
250 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
252
253 if (!RuleConfig.parseCommandLineOption())
254 report_fatal_error("Invalid rule identifier");
255}
256
257bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
258 if (MF.getProperties().hasProperty(
259 MachineFunctionProperties::Property::FailedISel))
260 return false;
261 auto *TPC = &getAnalysis<TargetPassConfig>();
262 const Function &F = MF.getFunction();
263 bool EnableOpt =
264 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
265 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
266
267 // Enable CSE.
269 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
270 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
271
272 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
274 IsOptNone ? nullptr
275 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
276 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
277 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
278 // Disable fixed-point iteration to reduce compile-time
279 CInfo.MaxIterations = 1;
280 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
281 // This is the first Combiner, so the input IR might contain dead
282 // instructions.
283 CInfo.EnableFullDCE = true;
284 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
285 STI, MDT, STI.getLegalizerInfo());
286 return Impl.combineMachineInstrs();
287}
288
289char AMDGPUPreLegalizerCombiner::ID = 0;
290INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
291 "Combine AMDGPU machine instrs before legalization",
292 false, false)
295INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
296 "Combine AMDGPU machine instrs before legalization", false,
297 false)
298
299namespace llvm {
301 return new AMDGPUPreLegalizerCombiner(IsOptNone);
302}
303} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
static const LLT V2S16
static const LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs before legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:309
The actual analysis pass wrapper.
Definition: CSEInfo.h:225
Simple wrapper that does the following.
Definition: CSEInfo.h:207
The CSE Analysis object.
Definition: CSEInfo.h:70
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1168
auto instrs(const MachineBasicBlock &BB)