LLVM 18.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 // TODO: Make CombinerHelper methods const.
49 mutable AMDGPUCombinerHelper Helper;
50
51public:
52 AMDGPUPreLegalizerCombinerImpl(
53 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
54 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
55 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
56 const GCNSubtarget &STI, MachineDominatorTree *MDT,
57 const LegalizerInfo *LI);
58
59 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
60
61 bool tryCombineAllImpl(MachineInstr &MI) const;
62 bool tryCombineAll(MachineInstr &I) const override;
63
64 struct ClampI64ToI16MatchInfo {
65 int64_t Cmp1 = 0;
66 int64_t Cmp2 = 0;
67 Register Origin;
68 };
69
70 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
71 const MachineFunction &MF,
72 ClampI64ToI16MatchInfo &MatchInfo) const;
73
74 void applyClampI64ToI16(MachineInstr &MI,
75 const ClampI64ToI16MatchInfo &MatchInfo) const;
76
77private:
78#define GET_GICOMBINER_CLASS_MEMBERS
79#define AMDGPUSubtarget GCNSubtarget
80#include "AMDGPUGenPreLegalizeGICombiner.inc"
81#undef GET_GICOMBINER_CLASS_MEMBERS
82#undef AMDGPUSubtarget
83};
84
85#define GET_GICOMBINER_IMPL
86#define AMDGPUSubtarget GCNSubtarget
87#include "AMDGPUGenPreLegalizeGICombiner.inc"
88#undef AMDGPUSubtarget
89#undef GET_GICOMBINER_IMPL
90
91AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
93 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
94 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
96 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97 Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
99#include "AMDGPUGenPreLegalizeGICombiner.inc"
101{
102}
103
104bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
105 if (tryCombineAllImpl(MI))
106 return true;
107
108 switch (MI.getOpcode()) {
109 case TargetOpcode::G_CONCAT_VECTORS:
110 return Helper.tryCombineConcatVectors(MI);
111 case TargetOpcode::G_SHUFFLE_VECTOR:
112 return Helper.tryCombineShuffleVector(MI);
113 }
114
115 return false;
116}
117
118bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
120 ClampI64ToI16MatchInfo &MatchInfo) const {
121 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
122
123 // Try to find a pattern where an i64 value should get clamped to short.
124 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
125 if (SrcType != LLT::scalar(64))
126 return false;
127
128 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
129 if (DstType != LLT::scalar(16))
130 return false;
131
133
134 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
135 const auto Cmp1 = MatchInfo.Cmp1;
136 const auto Cmp2 = MatchInfo.Cmp2;
137 const auto Diff = std::abs(Cmp2 - Cmp1);
138
139 // If the difference between both comparison values is 0 or 1, there is no
140 // need to clamp.
141 if (Diff == 0 || Diff == 1)
142 return false;
143
144 const int64_t Min = std::numeric_limits<int16_t>::min();
145 const int64_t Max = std::numeric_limits<int16_t>::max();
146
147 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
148 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
149 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
150 };
151
152 // Try to match a combination of min / max MIR opcodes.
153 if (mi_match(MI.getOperand(1).getReg(), MRI,
154 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
155 if (mi_match(Base, MRI,
156 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
157 return IsApplicableForCombine();
158 }
159 }
160
161 if (mi_match(MI.getOperand(1).getReg(), MRI,
162 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
163 if (mi_match(Base, MRI,
164 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
165 return IsApplicableForCombine();
166 }
167 }
168
169 return false;
170}
171
172// We want to find a combination of instructions that
173// gets generated when an i64 gets clamped to i16.
174// The corresponding pattern is:
175// G_MAX / G_MAX for i16 <= G_TRUNC i64.
176// This can be efficiently written as following:
177// v_cvt_pk_i16_i32 v0, v0, v1
178// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
179void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
180 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
181
182 Register Src = MatchInfo.Origin;
183 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
184 LLT::scalar(64));
185 const LLT S32 = LLT::scalar(32);
186
187 B.setInstrAndDebugLoc(MI);
188
189 auto Unmerge = B.buildUnmerge(S32, Src);
190
191 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
192
193 const LLT V2S16 = LLT::fixed_vector(2, 16);
194 auto CvtPk =
195 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
196 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
197
198 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
199 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
200 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
201 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
202
203 auto Bitcast = B.buildBitcast({S32}, CvtPk);
204
205 auto Med3 = B.buildInstr(
206 AMDGPU::G_AMDGPU_SMED3, {S32},
207 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
208 MI.getFlags());
209
210 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
211
212 MI.eraseFromParent();
213}
214
215// Pass boilerplate
216// ================
217
218class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
219public:
220 static char ID;
221
222 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
223
224 StringRef getPassName() const override {
225 return "AMDGPUPreLegalizerCombiner";
226 }
227
228 bool runOnMachineFunction(MachineFunction &MF) override;
229
230 void getAnalysisUsage(AnalysisUsage &AU) const override;
231
232private:
233 bool IsOptNone;
234 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
235};
236} // end anonymous namespace
237
238void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
240 AU.setPreservesCFG();
244 if (!IsOptNone) {
247 }
248
252}
253
254AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
255 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
257
258 if (!RuleConfig.parseCommandLineOption())
259 report_fatal_error("Invalid rule identifier");
260}
261
262bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
263 if (MF.getProperties().hasProperty(
264 MachineFunctionProperties::Property::FailedISel))
265 return false;
266 auto *TPC = &getAnalysis<TargetPassConfig>();
267 const Function &F = MF.getFunction();
268 bool EnableOpt =
269 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
270 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
271
272 // Enable CSE.
274 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
275 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
276
277 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
279 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
280 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
281 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
282 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
283 STI, MDT, STI.getLegalizerInfo());
284 return Impl.combineMachineInstrs();
285}
286
287char AMDGPUPreLegalizerCombiner::ID = 0;
288INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
289 "Combine AMDGPU machine instrs before legalization",
290 false, false)
293INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
294 "Combine AMDGPU machine instrs before legalization", false,
295 false)
296
297namespace llvm {
299 return new AMDGPUPreLegalizerCombiner(IsOptNone);
300}
301} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs before legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:265
The actual analysis pass wrapper.
Definition: CSEInfo.h:222
Simple wrapper that does the following.
Definition: CSEInfo.h:204
The CSE Analysis object.
Definition: CSEInfo.h:69
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:92
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:922
auto instrs(const MachineBasicBlock &BB)