LLVM 17.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
28
29#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
30
31using namespace llvm;
32using namespace MIPatternMatch;
33
35protected:
40
41public:
44 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
45
47 int64_t Cmp1 = 0;
48 int64_t Cmp2 = 0;
50 };
51
54 ClampI64ToI16MatchInfo &MatchInfo);
55
57 const ClampI64ToI16MatchInfo &MatchInfo);
58};
59
62 ClampI64ToI16MatchInfo &MatchInfo) {
63 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
64
65 // Try to find a pattern where an i64 value should get clamped to short.
66 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
67 if (SrcType != LLT::scalar(64))
68 return false;
69
70 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
71 if (DstType != LLT::scalar(16))
72 return false;
73
75
76 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
77 const auto Cmp1 = MatchInfo.Cmp1;
78 const auto Cmp2 = MatchInfo.Cmp2;
79 const auto Diff = std::abs(Cmp2 - Cmp1);
80
81 // If the difference between both comparison values is 0 or 1, there is no
82 // need to clamp.
83 if (Diff == 0 || Diff == 1)
84 return false;
85
86 const int64_t Min = std::numeric_limits<int16_t>::min();
87 const int64_t Max = std::numeric_limits<int16_t>::max();
88
89 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
90 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
91 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
92 };
93
94 // Try to match a combination of min / max MIR opcodes.
95 if (mi_match(MI.getOperand(1).getReg(), MRI,
96 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
97 if (mi_match(Base, MRI,
98 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
99 return IsApplicableForCombine();
100 }
101 }
102
103 if (mi_match(MI.getOperand(1).getReg(), MRI,
104 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
105 if (mi_match(Base, MRI,
106 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
107 return IsApplicableForCombine();
108 }
109 }
110
111 return false;
112}
113
114// We want to find a combination of instructions that
115// gets generated when an i64 gets clamped to i16.
116// The corresponding pattern is:
117// G_MAX / G_MAX for i16 <= G_TRUNC i64.
118// This can be efficiently written as following:
119// v_cvt_pk_i16_i32 v0, v0, v1
120// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
122 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
123
124 Register Src = MatchInfo.Origin;
125 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
126 LLT::scalar(64));
127 const LLT S32 = LLT::scalar(32);
128
130
131 auto Unmerge = B.buildUnmerge(S32, Src);
132
133 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
134
135 const LLT V2S16 = LLT::fixed_vector(2, 16);
136 auto CvtPk =
137 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
138 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
139
140 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
141 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
142 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
143 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
144
145 auto Bitcast = B.buildBitcast({S32}, CvtPk);
146
147 auto Med3 = B.buildInstr(
148 AMDGPU::G_AMDGPU_SMED3, {S32},
149 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
150 MI.getFlags());
151
152 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
153
154 MI.eraseFromParent();
155}
156
158protected:
161
162public:
167};
168
169#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
170#include "AMDGPUGenPreLegalizeGICombiner.inc"
171#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
172
173namespace {
174#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
175#include "AMDGPUGenPreLegalizeGICombiner.inc"
176#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
177
178class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
179 GISelKnownBits *KB;
181
182public:
183 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
184
185 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
187 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
188 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
189 KB(KB), MDT(MDT) {
190 if (!GeneratedRuleCfg.parseCommandLineOption())
191 report_fatal_error("Invalid rule identifier");
192 }
193
195 MachineIRBuilder &B) const override;
196};
197
198bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
200 MachineIRBuilder &B) const {
201 const auto *LI = MI.getMF()->getSubtarget().getLegalizerInfo();
202 AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT, LI);
203 AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
204 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
205 PreLegalizerHelper);
206
207 if (Generated.tryCombineAll(Observer, MI, B))
208 return true;
209
210 switch (MI.getOpcode()) {
211 case TargetOpcode::G_CONCAT_VECTORS:
212 return Helper.tryCombineConcatVectors(MI);
213 case TargetOpcode::G_SHUFFLE_VECTOR:
214 return Helper.tryCombineShuffleVector(MI);
215 }
216
217 return false;
218}
219
220#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
221#include "AMDGPUGenPreLegalizeGICombiner.inc"
222#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
223
224// Pass boilerplate
225// ================
226
227class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
228public:
229 static char ID;
230
231 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
232
233 StringRef getPassName() const override {
234 return "AMDGPUPreLegalizerCombiner";
235 }
236
237 bool runOnMachineFunction(MachineFunction &MF) override;
238
239 void getAnalysisUsage(AnalysisUsage &AU) const override;
240private:
241 bool IsOptNone;
242};
243} // end anonymous namespace
244
245void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
247 AU.setPreservesCFG();
251 if (!IsOptNone) {
254 }
255
259}
260
261AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
262 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
264}
265
266bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
267 if (MF.getProperties().hasProperty(
268 MachineFunctionProperties::Property::FailedISel))
269 return false;
270 auto *TPC = &getAnalysis<TargetPassConfig>();
271 const Function &F = MF.getFunction();
272 bool EnableOpt =
273 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
274 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
276 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
277 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
278 F.hasMinSize(), KB, MDT);
279 // Enable CSE.
281 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
282 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
283
284 Combiner C(PCInfo, TPC);
285 return C.combineMachineInstrs(MF, CSEInfo);
286}
287
288char AMDGPUPreLegalizerCombiner::ID = 0;
289INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
290 "Combine AMDGPU machine instrs before legalization",
291 false, false)
294INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
295 "Combine AMDGPU machine instrs before legalization", false,
296 false)
297
298namespace llvm {
300 return new AMDGPUPreLegalizerCombiner(IsOptNone);
301}
302} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
Combine AMDGPU machine instrs before legalization
#define DEBUG_TYPE
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPUPreLegalizerCombinerHelper & PreLegalizerHelper
AMDGPUPreLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, ClampI64ToI16MatchInfo &MatchInfo)
AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
void applyClampI64ToI16(MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The actual analysis pass wrapper.
Definition: CSEInfo.h:222
Simple wrapper that does the following.
Definition: CSEInfo.h:204
Abstract class that contains various methods for clients to notify about changes.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:76
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:892
auto instrs(const MachineBasicBlock &BB)