LLVM 20.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 // TODO: Make CombinerHelper methods const.
49 mutable AMDGPUCombinerHelper Helper;
50
51public:
52 AMDGPUPreLegalizerCombinerImpl(
53 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
54 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
55 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
56 const GCNSubtarget &STI, MachineDominatorTree *MDT,
57 const LegalizerInfo *LI);
58
59 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
60
61 bool tryCombineAllImpl(MachineInstr &MI) const;
62 bool tryCombineAll(MachineInstr &I) const override;
63
64 struct ClampI64ToI16MatchInfo {
65 int64_t Cmp1 = 0;
66 int64_t Cmp2 = 0;
67 Register Origin;
68 };
69
70 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
71 const MachineFunction &MF,
72 ClampI64ToI16MatchInfo &MatchInfo) const;
73
74 void applyClampI64ToI16(MachineInstr &MI,
75 const ClampI64ToI16MatchInfo &MatchInfo) const;
76
77private:
78#define GET_GICOMBINER_CLASS_MEMBERS
79#define AMDGPUSubtarget GCNSubtarget
80#include "AMDGPUGenPreLegalizeGICombiner.inc"
81#undef GET_GICOMBINER_CLASS_MEMBERS
82#undef AMDGPUSubtarget
83};
84
85#define GET_GICOMBINER_IMPL
86#define AMDGPUSubtarget GCNSubtarget
87#include "AMDGPUGenPreLegalizeGICombiner.inc"
88#undef AMDGPUSubtarget
89#undef GET_GICOMBINER_IMPL
90
91AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
93 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
94 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
96 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97 Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
99#include "AMDGPUGenPreLegalizeGICombiner.inc"
101{
102}
103
104bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
105 if (tryCombineAllImpl(MI))
106 return true;
107
108 switch (MI.getOpcode()) {
109 case TargetOpcode::G_SHUFFLE_VECTOR:
110 return Helper.tryCombineShuffleVector(MI);
111 }
112
113 return false;
114}
115
116bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
118 ClampI64ToI16MatchInfo &MatchInfo) const {
119 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
120
121 // Try to find a pattern where an i64 value should get clamped to short.
122 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
123 if (SrcType != LLT::scalar(64))
124 return false;
125
126 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
127 if (DstType != LLT::scalar(16))
128 return false;
129
131
132 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
133 const auto Cmp1 = MatchInfo.Cmp1;
134 const auto Cmp2 = MatchInfo.Cmp2;
135 const auto Diff = std::abs(Cmp2 - Cmp1);
136
137 // If the difference between both comparison values is 0 or 1, there is no
138 // need to clamp.
139 if (Diff == 0 || Diff == 1)
140 return false;
141
142 const int64_t Min = std::numeric_limits<int16_t>::min();
143 const int64_t Max = std::numeric_limits<int16_t>::max();
144
145 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
146 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
147 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
148 };
149
150 // Try to match a combination of min / max MIR opcodes.
151 if (mi_match(MI.getOperand(1).getReg(), MRI,
152 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
153 if (mi_match(Base, MRI,
154 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
155 return IsApplicableForCombine();
156 }
157 }
158
159 if (mi_match(MI.getOperand(1).getReg(), MRI,
160 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
161 if (mi_match(Base, MRI,
162 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
163 return IsApplicableForCombine();
164 }
165 }
166
167 return false;
168}
169
170// We want to find a combination of instructions that
171// gets generated when an i64 gets clamped to i16.
172// The corresponding pattern is:
173// G_MAX / G_MAX for i16 <= G_TRUNC i64.
174// This can be efficiently written as following:
175// v_cvt_pk_i16_i32 v0, v0, v1
176// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
177void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
178 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
179
180 Register Src = MatchInfo.Origin;
181 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
182 LLT::scalar(64));
183 const LLT S32 = LLT::scalar(32);
184
185 auto Unmerge = B.buildUnmerge(S32, Src);
186
187 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
188
189 const LLT V2S16 = LLT::fixed_vector(2, 16);
190 auto CvtPk =
191 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
192 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
193
194 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
195 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
196 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
197 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
198
199 auto Bitcast = B.buildBitcast({S32}, CvtPk);
200
201 auto Med3 = B.buildInstr(
202 AMDGPU::G_AMDGPU_SMED3, {S32},
203 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
204 MI.getFlags());
205
206 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
207
208 MI.eraseFromParent();
209}
210
211// Pass boilerplate
212// ================
213
214class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
215public:
216 static char ID;
217
218 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
219
220 StringRef getPassName() const override {
221 return "AMDGPUPreLegalizerCombiner";
222 }
223
224 bool runOnMachineFunction(MachineFunction &MF) override;
225
226 void getAnalysisUsage(AnalysisUsage &AU) const override;
227
228private:
229 bool IsOptNone;
230 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
231};
232} // end anonymous namespace
233
234void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
236 AU.setPreservesCFG();
240 if (!IsOptNone) {
243 }
244
248}
249
250AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
251 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
253
254 if (!RuleConfig.parseCommandLineOption())
255 report_fatal_error("Invalid rule identifier");
256}
257
258bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
259 if (MF.getProperties().hasProperty(
260 MachineFunctionProperties::Property::FailedISel))
261 return false;
262 auto *TPC = &getAnalysis<TargetPassConfig>();
263 const Function &F = MF.getFunction();
264 bool EnableOpt =
265 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
266 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
267
268 // Enable CSE.
270 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
271 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
272
273 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
275 IsOptNone ? nullptr
276 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
277 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
278 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
279 // Disable fixed-point iteration to reduce compile-time
280 CInfo.MaxIterations = 1;
281 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
282 // This is the first Combiner, so the input IR might contain dead
283 // instructions.
284 CInfo.EnableFullDCE = true;
285 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
286 STI, MDT, STI.getLegalizerInfo());
287 return Impl.combineMachineInstrs();
288}
289
290char AMDGPUPreLegalizerCombiner::ID = 0;
291INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
292 "Combine AMDGPU machine instrs before legalization",
293 false, false)
296INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
297 "Combine AMDGPU machine instrs before legalization", false,
298 false)
299
300namespace llvm {
302 return new AMDGPUPreLegalizerCombiner(IsOptNone);
303}
304} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
static const LLT V2S16
static const LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs before legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:309
The actual analysis pass wrapper.
Definition: CSEInfo.h:225
Simple wrapper that does the following.
Definition: CSEInfo.h:207
The CSE Analysis object.
Definition: CSEInfo.h:70
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1153
auto instrs(const MachineBasicBlock &BB)