Bug Summary

File:build/source/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
Warning:line 265, column 8
Value stored to 'IsShr' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUPostLegalizerCombiner.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AMDGPU -I /build/source/llvm/lib/Target/AMDGPU -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUCombinerHelper.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/CodeGen/GlobalISel/Combiner.h"
20#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24#include "llvm/CodeGen/MachineDominators.h"
25#include "llvm/CodeGen/TargetPassConfig.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
27#include "llvm/Target/TargetMachine.h"
28
29#define DEBUG_TYPE"amdgpu-postlegalizer-combiner" "amdgpu-postlegalizer-combiner"
30
31using namespace llvm;
32using namespace MIPatternMatch;
33
34class AMDGPUPostLegalizerCombinerHelper {
35protected:
36 MachineIRBuilder &B;
37 MachineFunction &MF;
38 MachineRegisterInfo &MRI;
39 const GCNSubtarget &Subtarget;
40 const SIInstrInfo &TII;
41 AMDGPUCombinerHelper &Helper;
42
43public:
44 AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
45 AMDGPUCombinerHelper &Helper)
46 : B(B), MF(B.getMF()), MRI(*B.getMRI()),
47 Subtarget(MF.getSubtarget<GCNSubtarget>()),
48 TII(*Subtarget.getInstrInfo()), Helper(Helper){};
49
50 struct FMinFMaxLegacyInfo {
51 Register LHS;
52 Register RHS;
53 Register True;
54 Register False;
55 CmpInst::Predicate Pred;
56 };
57
58 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
59 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
60 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
61 const FMinFMaxLegacyInfo &Info);
62
63 bool matchUCharToFloat(MachineInstr &MI);
64 void applyUCharToFloat(MachineInstr &MI);
65
66 bool matchRcpSqrtToRsq(MachineInstr &MI,
67 std::function<void(MachineIRBuilder &)> &MatchInfo);
68
69 // FIXME: Should be able to have 2 separate matchdatas rather than custom
70 // struct boilerplate.
71 struct CvtF32UByteMatchInfo {
72 Register CvtVal;
73 unsigned ShiftOffset;
74 };
75
76 bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
77 void applyCvtF32UByteN(MachineInstr &MI,
78 const CvtF32UByteMatchInfo &MatchInfo);
79
80 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
81
82 // Combine unsigned buffer load and signed extension instructions to generate
83 // signed buffer laod instructions.
84 bool matchCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo);
85 void applyCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo);
86};
87
88bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
89 MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
90 // FIXME: Type predicate on pattern
91 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
92 return false;
93
94 Register Cond = MI.getOperand(1).getReg();
95 if (!MRI.hasOneNonDBGUse(Cond) ||
96 !mi_match(Cond, MRI,
97 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
98 return false;
99
100 Info.True = MI.getOperand(2).getReg();
101 Info.False = MI.getOperand(3).getReg();
102
103 // TODO: Handle case where the the selected value is an fneg and the compared
104 // constant is the negation of the selected value.
105 if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
106 !(Info.LHS == Info.False && Info.RHS == Info.True))
107 return false;
108
109 switch (Info.Pred) {
110 case CmpInst::FCMP_FALSE:
111 case CmpInst::FCMP_OEQ:
112 case CmpInst::FCMP_ONE:
113 case CmpInst::FCMP_ORD:
114 case CmpInst::FCMP_UNO:
115 case CmpInst::FCMP_UEQ:
116 case CmpInst::FCMP_UNE:
117 case CmpInst::FCMP_TRUE:
118 return false;
119 default:
120 return true;
121 }
122}
123
124void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
125 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
126 B.setInstrAndDebugLoc(MI);
127 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
128 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
129 };
130
131 switch (Info.Pred) {
132 case CmpInst::FCMP_ULT:
133 case CmpInst::FCMP_ULE:
134 if (Info.LHS == Info.True)
135 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
136 else
137 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
138 break;
139 case CmpInst::FCMP_OLE:
140 case CmpInst::FCMP_OLT: {
141 // We need to permute the operands to get the correct NaN behavior. The
142 // selected operand is the second one based on the failing compare with NaN,
143 // so permute it based on the compare type the hardware uses.
144 if (Info.LHS == Info.True)
145 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
146 else
147 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
148 break;
149 }
150 case CmpInst::FCMP_UGE:
151 case CmpInst::FCMP_UGT: {
152 if (Info.LHS == Info.True)
153 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
154 else
155 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
156 break;
157 }
158 case CmpInst::FCMP_OGT:
159 case CmpInst::FCMP_OGE: {
160 if (Info.LHS == Info.True)
161 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
162 else
163 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
164 break;
165 }
166 default:
167 llvm_unreachable("predicate should not have matched")::llvm::llvm_unreachable_internal("predicate should not have matched"
, "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp", 167
)
;
168 }
169
170 MI.eraseFromParent();
171}
172
173bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
174 Register DstReg = MI.getOperand(0).getReg();
175
176 // TODO: We could try to match extracting the higher bytes, which would be
177 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
178 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
179 // about in practice.
180 LLT Ty = MRI.getType(DstReg);
181 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
182 Register SrcReg = MI.getOperand(1).getReg();
183 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
184 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64)(static_cast <bool> (SrcSize == 16 || SrcSize == 32 || SrcSize
== 64) ? void (0) : __assert_fail ("SrcSize == 16 || SrcSize == 32 || SrcSize == 64"
, "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp", 184
, __extension__ __PRETTY_FUNCTION__))
;
185 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
186 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
187 }
188
189 return false;
190}
191
192void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
193 B.setInstrAndDebugLoc(MI);
194
195 const LLT S32 = LLT::scalar(32);
196
197 Register DstReg = MI.getOperand(0).getReg();
198 Register SrcReg = MI.getOperand(1).getReg();
199 LLT Ty = MRI.getType(DstReg);
200 LLT SrcTy = MRI.getType(SrcReg);
201 if (SrcTy != S32)
202 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
203
204 if (Ty == S32) {
205 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
206 {SrcReg}, MI.getFlags());
207 } else {
208 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
209 {SrcReg}, MI.getFlags());
210 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
211 }
212
213 MI.eraseFromParent();
214}
215
216bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
217 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
218
219 auto getRcpSrc = [=](const MachineInstr &MI) {
220 MachineInstr *ResMI = nullptr;
221 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
222 MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
223 ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
224
225 return ResMI;
226 };
227
228 auto getSqrtSrc = [=](const MachineInstr &MI) {
229 MachineInstr *SqrtSrcMI = nullptr;
230 auto Match =
231 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
232 (void)Match;
233 return SqrtSrcMI;
234 };
235
236 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
237 // rcp(sqrt(x))
238 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
239 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
240 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
241 .addUse(SqrtSrcMI->getOperand(0).getReg())
242 .setMIFlags(MI.getFlags());
243 };
244 return true;
245 }
246
247 // sqrt(rcp(x))
248 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
249 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
250 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
251 .addUse(RcpSrcMI->getOperand(0).getReg())
252 .setMIFlags(MI.getFlags());
253 };
254 return true;
255 }
256
257 return false;
258}
259
260bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
261 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
262 Register SrcReg = MI.getOperand(1).getReg();
263
264 // Look through G_ZEXT.
265 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
Value stored to 'IsShr' during its initialization is never read
266
267 Register Src0;
268 int64_t ShiftAmt;
269 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
270 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
271 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
272
273 unsigned ShiftOffset = 8 * Offset;
274 if (IsShr)
275 ShiftOffset += ShiftAmt;
276 else
277 ShiftOffset -= ShiftAmt;
278
279 MatchInfo.CvtVal = Src0;
280 MatchInfo.ShiftOffset = ShiftOffset;
281 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
282 }
283
284 // TODO: Simplify demanded bits.
285 return false;
286}
287
288void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
289 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
290 B.setInstrAndDebugLoc(MI);
291 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
292
293 const LLT S32 = LLT::scalar(32);
294 Register CvtSrc = MatchInfo.CvtVal;
295 LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
296 if (SrcTy != S32) {
297 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8)(static_cast <bool> (SrcTy.isScalar() && SrcTy.
getSizeInBits() >= 8) ? void (0) : __assert_fail ("SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8"
, "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp", 297
, __extension__ __PRETTY_FUNCTION__))
;
298 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
299 }
300
301 assert(MI.getOpcode() != NewOpc)(static_cast <bool> (MI.getOpcode() != NewOpc) ? void (
0) : __assert_fail ("MI.getOpcode() != NewOpc", "llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp"
, 301, __extension__ __PRETTY_FUNCTION__))
;
302 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
303 MI.eraseFromParent();
304}
305
306bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
307 MachineInstr &MI, Register &Reg) {
308 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
309 MF.getSubtarget().getTargetLowering());
310 Reg = MI.getOperand(1).getReg();
311 return TLI->isCanonicalized(Reg, MF);
312}
313
314// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
315// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
316// with sign extension instrucions in order to generate buffer_load_{i8, i16}
317// instructions.
318
319// Identify buffer_load_{u8, u16}.
320bool AMDGPUPostLegalizerCombinerHelper::matchCombineSignExtendInReg(
321 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) {
322 Register Op0Reg = MI.getOperand(1).getReg();
323 SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
324
325 if (!MRI.hasOneNonDBGUse(Op0Reg))
326 return false;
327
328 // Check if the first operand of the sign extension is a subword buffer load
329 // instruction.
330 return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
331 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
332}
333
334// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
335// buffer_load_{i8, i16}.
336void AMDGPUPostLegalizerCombinerHelper::applyCombineSignExtendInReg(
337 MachineInstr &MI, MachineInstr *&SubwordBufferLoad) {
338 // Modify the opcode and the destination of buffer_load_{u8, u16}:
339 // Replace the opcode.
340 unsigned Opc =
341 SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
342 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
343 : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
344 SubwordBufferLoad->setDesc(TII.get(Opc));
345 // Update the destination register of SubwordBufferLoad with the destination
346 // register of the sign extension.
347 Register SignExtendInsnDst = MI.getOperand(0).getReg();
348 SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
349 // Remove the sign extension.
350 MI.eraseFromParent();
351}
352
353class AMDGPUPostLegalizerCombinerHelperState {
354protected:
355 AMDGPUCombinerHelper &Helper;
356 AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
357
358 // Note: pointer is necessary because Target Predicates use
359 // "Subtarget->"
360 const GCNSubtarget *Subtarget;
361
362public:
363 AMDGPUPostLegalizerCombinerHelperState(
364 AMDGPUCombinerHelper &Helper,
365 AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper,
366 const GCNSubtarget &Subtarget)
367 : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper),
368 Subtarget(&Subtarget) {}
369};
370
371#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
372#include "AMDGPUGenPostLegalizeGICombiner.inc"
373#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
374
375namespace {
376#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
377#include "AMDGPUGenPostLegalizeGICombiner.inc"
378#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
379
380class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
381 GISelKnownBits *KB;
382 MachineDominatorTree *MDT;
383 const GCNSubtarget &Subtarget;
384
385public:
386 AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
387
388 AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt,
389 bool OptSize, bool MinSize,
390 const AMDGPULegalizerInfo *LI,
391 GISelKnownBits *KB, MachineDominatorTree *MDT)
392 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
393 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
394 KB(KB), MDT(MDT), Subtarget(Subtarget) {
395 if (!GeneratedRuleCfg.parseCommandLineOption())
396 report_fatal_error("Invalid rule identifier");
397 }
398
399 bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
400 MachineIRBuilder &B) const override;
401};
402
403bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
404 MachineInstr &MI,
405 MachineIRBuilder &B) const {
406 AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
407 LInfo);
408 AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
409 AMDGPUGenPostLegalizerCombinerHelper Generated(
410 GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget);
411
412 if (Generated.tryCombineAll(Observer, MI, B))
413 return true;
414
415 switch (MI.getOpcode()) {
416 case TargetOpcode::G_SHL:
417 case TargetOpcode::G_LSHR:
418 case TargetOpcode::G_ASHR:
419 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
420 // common case, splitting this into a move and a 32-bit shift is faster and
421 // the same code size.
422 return Helper.tryCombineShiftToUnmerge(MI, 32);
423 }
424
425 return false;
426}
427
428#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
429#include "AMDGPUGenPostLegalizeGICombiner.inc"
430#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
431
432// Pass boilerplate
433// ================
434
435class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
436public:
437 static char ID;
438
439 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
440
441 StringRef getPassName() const override {
442 return "AMDGPUPostLegalizerCombiner";
443 }
444
445 bool runOnMachineFunction(MachineFunction &MF) override;
446
447 void getAnalysisUsage(AnalysisUsage &AU) const override;
448private:
449 bool IsOptNone;
450};
451} // end anonymous namespace
452
453void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
454 AU.addRequired<TargetPassConfig>();
455 AU.setPreservesCFG();
456 getSelectionDAGFallbackAnalysisUsage(AU);
457 AU.addRequired<GISelKnownBitsAnalysis>();
458 AU.addPreserved<GISelKnownBitsAnalysis>();
459 if (!IsOptNone) {
460 AU.addRequired<MachineDominatorTree>();
461 AU.addPreserved<MachineDominatorTree>();
462 }
463 MachineFunctionPass::getAnalysisUsage(AU);
464}
465
466AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
467 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
468 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
469}
470
471bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
472 if (MF.getProperties().hasProperty(
473 MachineFunctionProperties::Property::FailedISel))
474 return false;
475 auto *TPC = &getAnalysis<TargetPassConfig>();
476 const Function &F = MF.getFunction();
477 bool EnableOpt =
478 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
479
480 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
481 const AMDGPULegalizerInfo *LI
482 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
483
484 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
485 MachineDominatorTree *MDT =
486 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
487 AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(),
488 F.hasMinSize(), LI, KB, MDT);
489 Combiner C(PCInfo, TPC);
490 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
491}
492
493char AMDGPUPostLegalizerCombiner::ID = 0;
494INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,static void *initializeAMDGPUPostLegalizerCombinerPassOnce(PassRegistry
&Registry) {
495 "Combine AMDGPU machine instrs after legalization",static void *initializeAMDGPUPostLegalizerCombinerPassOnce(PassRegistry
&Registry) {
496 false, false)static void *initializeAMDGPUPostLegalizerCombinerPassOnce(PassRegistry
&Registry) {
497INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)initializeTargetPassConfigPass(Registry);
498INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)initializeGISelKnownBitsAnalysisPass(Registry);
499INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Combine AMDGPU machine instrs after legalization"
, "amdgpu-postlegalizer-combiner", &AMDGPUPostLegalizerCombiner
::ID, PassInfo::NormalCtor_t(callDefaultCtor<AMDGPUPostLegalizerCombiner
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeAMDGPUPostLegalizerCombinerPassFlag
; void llvm::initializeAMDGPUPostLegalizerCombinerPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUPostLegalizerCombinerPassFlag
, initializeAMDGPUPostLegalizerCombinerPassOnce, std::ref(Registry
)); }
500 "Combine AMDGPU machine instrs after legalization", false,PassInfo *PI = new PassInfo( "Combine AMDGPU machine instrs after legalization"
, "amdgpu-postlegalizer-combiner", &AMDGPUPostLegalizerCombiner
::ID, PassInfo::NormalCtor_t(callDefaultCtor<AMDGPUPostLegalizerCombiner
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeAMDGPUPostLegalizerCombinerPassFlag
; void llvm::initializeAMDGPUPostLegalizerCombinerPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUPostLegalizerCombinerPassFlag
, initializeAMDGPUPostLegalizerCombinerPassOnce, std::ref(Registry
)); }
501 false)PassInfo *PI = new PassInfo( "Combine AMDGPU machine instrs after legalization"
, "amdgpu-postlegalizer-combiner", &AMDGPUPostLegalizerCombiner
::ID, PassInfo::NormalCtor_t(callDefaultCtor<AMDGPUPostLegalizerCombiner
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeAMDGPUPostLegalizerCombinerPassFlag
; void llvm::initializeAMDGPUPostLegalizerCombinerPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUPostLegalizerCombinerPassFlag
, initializeAMDGPUPostLegalizerCombinerPassOnce, std::ref(Registry
)); }
502
503namespace llvm {
504FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
505 return new AMDGPUPostLegalizerCombiner(IsOptNone);
506}
507} // end namespace llvm