Bug Summary

File:build/source/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Warning:line 119, column 5
Value stored to 'Ctor' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUTargetMachine.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AMDGPU -I /build/source/llvm/lib/Target/AMDGPU -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1679443490 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-03-22-005342-16304-1 -x c++ /build/source/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
1//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// The AMDGPU target machine contains all of the hardware specific
11/// information needed to emit code for SI+ GPUs.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPU.h"
17#include "AMDGPUAliasAnalysis.h"
18#include "AMDGPUCtorDtorLowering.h"
19#include "AMDGPUExportClustering.h"
20#include "AMDGPUIGroupLP.h"
21#include "AMDGPUMacroFusion.h"
22#include "AMDGPURegBankSelect.h"
23#include "AMDGPUTargetObjectFile.h"
24#include "AMDGPUTargetTransformInfo.h"
25#include "GCNIterativeScheduler.h"
26#include "GCNSchedStrategy.h"
27#include "GCNVOPDUtils.h"
28#include "R600.h"
29#include "R600MachineFunctionInfo.h"
30#include "R600TargetMachine.h"
31#include "SIMachineFunctionInfo.h"
32#include "SIMachineScheduler.h"
33#include "TargetInfo/AMDGPUTargetInfo.h"
34#include "Utils/AMDGPUBaseInfo.h"
35#include "llvm/Analysis/CGSCCPassManager.h"
36#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
37#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
38#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
39#include "llvm/CodeGen/GlobalISel/Legalizer.h"
40#include "llvm/CodeGen/GlobalISel/Localizer.h"
41#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
42#include "llvm/CodeGen/MIRParser/MIParser.h"
43#include "llvm/CodeGen/Passes.h"
44#include "llvm/CodeGen/RegAllocRegistry.h"
45#include "llvm/CodeGen/TargetPassConfig.h"
46#include "llvm/IR/IntrinsicsAMDGPU.h"
47#include "llvm/IR/PassManager.h"
48#include "llvm/IR/PatternMatch.h"
49#include "llvm/InitializePasses.h"
50#include "llvm/MC/TargetRegistry.h"
51#include "llvm/Passes/PassBuilder.h"
52#include "llvm/Transforms/IPO.h"
53#include "llvm/Transforms/IPO/AlwaysInliner.h"
54#include "llvm/Transforms/IPO/GlobalDCE.h"
55#include "llvm/Transforms/IPO/Internalize.h"
56#include "llvm/Transforms/Scalar.h"
57#include "llvm/Transforms/Scalar/GVN.h"
58#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
59#include "llvm/Transforms/Utils.h"
60#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
61#include "llvm/Transforms/Vectorize.h"
62#include <optional>
63
64using namespace llvm;
65using namespace llvm::PatternMatch;
66
67namespace {
68class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
69public:
70 SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
71 : RegisterRegAllocBase(N, D, C) {}
72};
73
74class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
75public:
76 VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
77 : RegisterRegAllocBase(N, D, C) {}
78};
79
80static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
81 const TargetRegisterClass &RC) {
82 return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
83}
84
85static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
86 const TargetRegisterClass &RC) {
87 return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
88}
89
90
91/// -{sgpr|vgpr}-regalloc=... command line option.
92static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
93
94/// A dummy default pass factory indicates whether the register allocator is
95/// overridden on the command line.
96static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
97static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
98
99static SGPRRegisterRegAlloc
100defaultSGPRRegAlloc("default",
101 "pick SGPR register allocator based on -O option",
102 useDefaultRegisterAllocator);
103
104static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
105 RegisterPassParser<SGPRRegisterRegAlloc>>
106SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
107 cl::desc("Register allocator to use for SGPRs"));
108
109static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
110 RegisterPassParser<VGPRRegisterRegAlloc>>
111VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
112 cl::desc("Register allocator to use for VGPRs"));
113
114
115static void initializeDefaultSGPRRegisterAllocatorOnce() {
116 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
117
118 if (!Ctor) {
119 Ctor = SGPRRegAlloc;
Value stored to 'Ctor' is never read
120 SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
121 }
122}
123
124static void initializeDefaultVGPRRegisterAllocatorOnce() {
125 RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
126
127 if (!Ctor) {
128 Ctor = VGPRRegAlloc;
129 VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
130 }
131}
132
133static FunctionPass *createBasicSGPRRegisterAllocator() {
134 return createBasicRegisterAllocator(onlyAllocateSGPRs);
135}
136
137static FunctionPass *createGreedySGPRRegisterAllocator() {
138 return createGreedyRegisterAllocator(onlyAllocateSGPRs);
139}
140
141static FunctionPass *createFastSGPRRegisterAllocator() {
142 return createFastRegisterAllocator(onlyAllocateSGPRs, false);
143}
144
145static FunctionPass *createBasicVGPRRegisterAllocator() {
146 return createBasicRegisterAllocator(onlyAllocateVGPRs);
147}
148
149static FunctionPass *createGreedyVGPRRegisterAllocator() {
150 return createGreedyRegisterAllocator(onlyAllocateVGPRs);
151}
152
153static FunctionPass *createFastVGPRRegisterAllocator() {
154 return createFastRegisterAllocator(onlyAllocateVGPRs, true);
155}
156
157static SGPRRegisterRegAlloc basicRegAllocSGPR(
158 "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
159static SGPRRegisterRegAlloc greedyRegAllocSGPR(
160 "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
161
162static SGPRRegisterRegAlloc fastRegAllocSGPR(
163 "fast", "fast register allocator", createFastSGPRRegisterAllocator);
164
165
166static VGPRRegisterRegAlloc basicRegAllocVGPR(
167 "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
168static VGPRRegisterRegAlloc greedyRegAllocVGPR(
169 "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
170
171static VGPRRegisterRegAlloc fastRegAllocVGPR(
172 "fast", "fast register allocator", createFastVGPRRegisterAllocator);
173}
174
175static cl::opt<bool> EnableSROA(
176 "amdgpu-sroa",
177 cl::desc("Run SROA after promote alloca pass"),
178 cl::ReallyHidden,
179 cl::init(true));
180
181static cl::opt<bool>
182EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
183 cl::desc("Run early if-conversion"),
184 cl::init(false));
185
186static cl::opt<bool>
187OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
188 cl::desc("Run pre-RA exec mask optimizations"),
189 cl::init(true));
190
191// Option to disable vectorizer for tests.
192static cl::opt<bool> EnableLoadStoreVectorizer(
193 "amdgpu-load-store-vectorizer",
194 cl::desc("Enable load store vectorizer"),
195 cl::init(true),
196 cl::Hidden);
197
198// Option to control global loads scalarization
199static cl::opt<bool> ScalarizeGlobal(
200 "amdgpu-scalarize-global-loads",
201 cl::desc("Enable global load scalarization"),
202 cl::init(true),
203 cl::Hidden);
204
205// Option to run internalize pass.
206static cl::opt<bool> InternalizeSymbols(
207 "amdgpu-internalize-symbols",
208 cl::desc("Enable elimination of non-kernel functions and unused globals"),
209 cl::init(false),
210 cl::Hidden);
211
212// Option to inline all early.
213static cl::opt<bool> EarlyInlineAll(
214 "amdgpu-early-inline-all",
215 cl::desc("Inline all functions early"),
216 cl::init(false),
217 cl::Hidden);
218
219static cl::opt<bool> RemoveIncompatibleFunctions(
220 "amdgpu-enable-remove-incompatible-functions", cl::Hidden,
221 cl::desc("Enable removal of functions when they"
222 "use features not supported by the target GPU"),
223 cl::init(true));
224
225static cl::opt<bool> EnableSDWAPeephole(
226 "amdgpu-sdwa-peephole",
227 cl::desc("Enable SDWA peepholer"),
228 cl::init(true));
229
230static cl::opt<bool> EnableDPPCombine(
231 "amdgpu-dpp-combine",
232 cl::desc("Enable DPP combiner"),
233 cl::init(true));
234
235// Enable address space based alias analysis
236static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
237 cl::desc("Enable AMDGPU Alias Analysis"),
238 cl::init(true));
239
240// Option to run late CFG structurizer
241static cl::opt<bool, true> LateCFGStructurize(
242 "amdgpu-late-structurize",
243 cl::desc("Enable late CFG structurization"),
244 cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
245 cl::Hidden);
246
247// Enable lib calls simplifications
248static cl::opt<bool> EnableLibCallSimplify(
249 "amdgpu-simplify-libcall",
250 cl::desc("Enable amdgpu library simplifications"),
251 cl::init(true),
252 cl::Hidden);
253
254static cl::opt<bool> EnableLowerKernelArguments(
255 "amdgpu-ir-lower-kernel-arguments",
256 cl::desc("Lower kernel argument loads in IR pass"),
257 cl::init(true),
258 cl::Hidden);
259
260static cl::opt<bool> EnableRegReassign(
261 "amdgpu-reassign-regs",
262 cl::desc("Enable register reassign optimizations on gfx10+"),
263 cl::init(true),
264 cl::Hidden);
265
266static cl::opt<bool> OptVGPRLiveRange(
267 "amdgpu-opt-vgpr-liverange",
268 cl::desc("Enable VGPR liverange optimizations for if-else structure"),
269 cl::init(true), cl::Hidden);
270
271// Enable atomic optimization
272static cl::opt<bool> EnableAtomicOptimizations(
273 "amdgpu-atomic-optimizations",
274 cl::desc("Enable atomic optimizations"),
275 cl::init(false),
276 cl::Hidden);
277
278// Enable Mode register optimization
279static cl::opt<bool> EnableSIModeRegisterPass(
280 "amdgpu-mode-register",
281 cl::desc("Enable mode register pass"),
282 cl::init(true),
283 cl::Hidden);
284
285// Enable GFX11+ s_delay_alu insertion
286static cl::opt<bool>
287 EnableInsertDelayAlu("amdgpu-enable-delay-alu",
288 cl::desc("Enable s_delay_alu insertion"),
289 cl::init(true), cl::Hidden);
290
291// Enable GFX11+ VOPD
292static cl::opt<bool>
293 EnableVOPD("amdgpu-enable-vopd",
294 cl::desc("Enable VOPD, dual issue of VALU in wave32"),
295 cl::init(true), cl::Hidden);
296
297// Option is used in lit tests to prevent deadcoding of patterns inspected.
298static cl::opt<bool>
299EnableDCEInRA("amdgpu-dce-in-ra",
300 cl::init(true), cl::Hidden,
301 cl::desc("Enable machine DCE inside regalloc"));
302
303static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
304 cl::desc("Adjust wave priority"),
305 cl::init(false), cl::Hidden);
306
307static cl::opt<bool> EnableScalarIRPasses(
308 "amdgpu-scalar-ir-passes",
309 cl::desc("Enable scalar IR passes"),
310 cl::init(true),
311 cl::Hidden);
312
313static cl::opt<bool> EnableStructurizerWorkarounds(
314 "amdgpu-enable-structurizer-workarounds",
315 cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
316 cl::Hidden);
317
318static cl::opt<bool> EnableLDSReplaceWithPointer(
319 "amdgpu-enable-lds-replace-with-pointer",
320 cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
321 cl::Hidden);
322
323static cl::opt<bool, true> EnableLowerModuleLDS(
324 "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
325 cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
326 cl::Hidden);
327
328static cl::opt<bool> EnablePreRAOptimizations(
329 "amdgpu-enable-pre-ra-optimizations",
330 cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
331 cl::Hidden);
332
333static cl::opt<bool> EnablePromoteKernelArguments(
334 "amdgpu-enable-promote-kernel-arguments",
335 cl::desc("Enable promotion of flat kernel pointer arguments to global"),
336 cl::Hidden, cl::init(true));
337
338static cl::opt<bool> EnableMaxIlpSchedStrategy(
339 "amdgpu-enable-max-ilp-scheduling-strategy",
340 cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
341 cl::Hidden, cl::init(false));
342
343extern "C" LLVM_EXTERNAL_VISIBILITY__attribute__((visibility("default"))) void LLVMInitializeAMDGPUTarget() {
344 // Register the target
345 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
346 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
347
348 PassRegistry *PR = PassRegistry::getPassRegistry();
349 initializeR600ClauseMergePassPass(*PR);
350 initializeR600ControlFlowFinalizerPass(*PR);
351 initializeR600PacketizerPass(*PR);
352 initializeR600ExpandSpecialInstrsPassPass(*PR);
353 initializeR600VectorRegMergerPass(*PR);
354 initializeGlobalISel(*PR);
355 initializeAMDGPUDAGToDAGISelPass(*PR);
356 initializeGCNDPPCombinePass(*PR);
357 initializeSILowerI1CopiesPass(*PR);
358 initializeSILowerSGPRSpillsPass(*PR);
359 initializeSIFixSGPRCopiesPass(*PR);
360 initializeSIFixVGPRCopiesPass(*PR);
361 initializeSIFoldOperandsPass(*PR);
362 initializeSIPeepholeSDWAPass(*PR);
363 initializeSIShrinkInstructionsPass(*PR);
364 initializeSIOptimizeExecMaskingPreRAPass(*PR);
365 initializeSIOptimizeVGPRLiveRangePass(*PR);
366 initializeSILoadStoreOptimizerPass(*PR);
367 initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
368 initializeAMDGPUAlwaysInlinePass(*PR);
369 initializeAMDGPUAttributorPass(*PR);
370 initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
371 initializeAMDGPUAnnotateUniformValuesPass(*PR);
372 initializeAMDGPUArgumentUsageInfoPass(*PR);
373 initializeAMDGPUAtomicOptimizerPass(*PR);
374 initializeAMDGPULowerKernelArgumentsPass(*PR);
375 initializeAMDGPUPromoteKernelArgumentsPass(*PR);
376 initializeAMDGPULowerKernelAttributesPass(*PR);
377 initializeAMDGPULowerIntrinsicsPass(*PR);
378 initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
379 initializeAMDGPUPostLegalizerCombinerPass(*PR);
380 initializeAMDGPUPreLegalizerCombinerPass(*PR);
381 initializeAMDGPURegBankCombinerPass(*PR);
382 initializeAMDGPURegBankSelectPass(*PR);
383 initializeAMDGPUPromoteAllocaPass(*PR);
384 initializeAMDGPUPromoteAllocaToVectorPass(*PR);
385 initializeAMDGPUCodeGenPreparePass(*PR);
386 initializeAMDGPULateCodeGenPreparePass(*PR);
387 initializeAMDGPUPropagateAttributesEarlyPass(*PR);
388 initializeAMDGPUPropagateAttributesLatePass(*PR);
389 initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
390 initializeAMDGPUReplaceLDSUseWithPointerPass(*PR);
391 initializeAMDGPULowerModuleLDSPass(*PR);
392 initializeAMDGPURewriteOutArgumentsPass(*PR);
393 initializeAMDGPURewriteUndefForPHIPass(*PR);
394 initializeAMDGPUUnifyMetadataPass(*PR);
395 initializeSIAnnotateControlFlowPass(*PR);
396 initializeAMDGPUReleaseVGPRsPass(*PR);
397 initializeAMDGPUInsertDelayAluPass(*PR);
398 initializeSIInsertHardClausesPass(*PR);
399 initializeSIInsertWaitcntsPass(*PR);
400 initializeSIModeRegisterPass(*PR);
401 initializeSIWholeQuadModePass(*PR);
402 initializeSILowerControlFlowPass(*PR);
403 initializeSIPreEmitPeepholePass(*PR);
404 initializeSILateBranchLoweringPass(*PR);
405 initializeSIMemoryLegalizerPass(*PR);
406 initializeSIOptimizeExecMaskingPass(*PR);
407 initializeSIPreAllocateWWMRegsPass(*PR);
408 initializeSIFormMemoryClausesPass(*PR);
409 initializeSIPostRABundlerPass(*PR);
410 initializeGCNCreateVOPDPass(*PR);
411 initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
412 initializeAMDGPUAAWrapperPassPass(*PR);
413 initializeAMDGPUExternalAAWrapperPass(*PR);
414 initializeAMDGPUUseNativeCallsPass(*PR);
415 initializeAMDGPUSimplifyLibCallsPass(*PR);
416 initializeAMDGPUPrintfRuntimeBindingPass(*PR);
417 initializeAMDGPUResourceUsageAnalysisPass(*PR);
418 initializeGCNNSAReassignPass(*PR);
419 initializeGCNPreRAOptimizationsPass(*PR);
420}
421
422static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
423 return std::make_unique<AMDGPUTargetObjectFile>();
424}
425
426static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
427 return new SIScheduleDAGMI(C);
428}
429
430static ScheduleDAGInstrs *
431createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
432 const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
433 ScheduleDAGMILive *DAG =
434 new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
435 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
436 if (ST.shouldClusterStores())
437 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
438 DAG->addMutation(createIGroupLPDAGMutation());
439 DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
440 DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
441 return DAG;
442}
443
444static ScheduleDAGInstrs *
445createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
446 ScheduleDAGMILive *DAG =
447 new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
448 DAG->addMutation(createIGroupLPDAGMutation());
449 return DAG;
450}
451
452static ScheduleDAGInstrs *
453createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
454 const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
455 auto DAG = new GCNIterativeScheduler(C,
456 GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
457 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
458 if (ST.shouldClusterStores())
459 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
460 return DAG;
461}
462
463static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
464 return new GCNIterativeScheduler(C,
465 GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
466}
467
468static ScheduleDAGInstrs *
469createIterativeILPMachineScheduler(MachineSchedContext *C) {
470 const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
471 auto DAG = new GCNIterativeScheduler(C,
472 GCNIterativeScheduler::SCHEDULE_ILP);
473 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
474 if (ST.shouldClusterStores())
475 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
476 DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
477 return DAG;
478}
479
480static MachineSchedRegistry
481SISchedRegistry("si", "Run SI's custom scheduler",
482 createSIMachineScheduler);
483
484static MachineSchedRegistry
485GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
486 "Run GCN scheduler to maximize occupancy",
487 createGCNMaxOccupancyMachineScheduler);
488
489static MachineSchedRegistry
490 GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp",
491 createGCNMaxILPMachineScheduler);
492
493static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry(
494 "gcn-iterative-max-occupancy-experimental",
495 "Run GCN scheduler to maximize occupancy (experimental)",
496 createIterativeGCNMaxOccupancyMachineScheduler);
497
498static MachineSchedRegistry GCNMinRegSchedRegistry(
499 "gcn-iterative-minreg",
500 "Run GCN iterative scheduler for minimal register usage (experimental)",
501 createMinRegScheduler);
502
503static MachineSchedRegistry GCNILPSchedRegistry(
504 "gcn-iterative-ilp",
505 "Run GCN iterative scheduler for ILP scheduling (experimental)",
506 createIterativeILPMachineScheduler);
507
508static StringRef computeDataLayout(const Triple &TT) {
509 if (TT.getArch() == Triple::r600) {
510 // 32-bit pointers.
511 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
512 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
513 }
514
515 // 32-bit private, local, and region pointers. 64-bit global, constant and
516 // flat, non-integral buffer fat pointers.
517 return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
518 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
519 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
520 "-ni:7";
521}
522
523LLVM_READNONE__attribute__((__const__))
524static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
525 if (!GPU.empty())
526 return GPU;
527
528 // Need to default to a target with flat support for HSA.
529 if (TT.getArch() == Triple::amdgcn)
530 return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
531
532 return "r600";
533}
534
535static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
536 // The AMDGPU toolchain only supports generating shared objects, so we
537 // must always use PIC.
538 return Reloc::PIC_;
539}
540
541AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
542 StringRef CPU, StringRef FS,
543 TargetOptions Options,
544 std::optional<Reloc::Model> RM,
545 std::optional<CodeModel::Model> CM,
546 CodeGenOpt::Level OptLevel)
547 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
548 FS, Options, getEffectiveRelocModel(RM),
549 getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
550 TLOF(createTLOF(getTargetTriple())) {
551 initAsmInfo();
552 if (TT.getArch() == Triple::amdgcn) {
553 if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
554 MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave64));
555 else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
556 MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave32));
557 }
558}
559
560bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
561bool AMDGPUTargetMachine::EnableFunctionCalls = false;
562bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
563
564AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
565
566StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
567 Attribute GPUAttr = F.getFnAttribute("target-cpu");
568 return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
569}
570
571StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
572 Attribute FSAttr = F.getFnAttribute("target-features");
573
574 return FSAttr.isValid() ? FSAttr.getValueAsString()
575 : getTargetFeatureString();
576}
577
578/// Predicate for Internalize pass.
579static bool mustPreserveGV(const GlobalValue &GV) {
580 if (const Function *F = dyn_cast<Function>(&GV))
581 return F->isDeclaration() || F->getName().startswith("__asan_") ||
582 F->getName().startswith("__sanitizer_") ||
583 AMDGPU::isEntryFunctionCC(F->getCallingConv());
584
585 GV.removeDeadConstantUsers();
586 return !GV.use_empty();
587}
588
589void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
590 AAM.registerFunctionAnalysis<AMDGPUAA>();
591}
592
593void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
594 PB.registerPipelineParsingCallback(
595 [this](StringRef PassName, ModulePassManager &PM,
596 ArrayRef<PassBuilder::PipelineElement>) {
597 if (PassName == "amdgpu-propagate-attributes-late") {
598 PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
599 return true;
600 }
601 if (PassName == "amdgpu-unify-metadata") {
602 PM.addPass(AMDGPUUnifyMetadataPass());
603 return true;
604 }
605 if (PassName == "amdgpu-printf-runtime-binding") {
606 PM.addPass(AMDGPUPrintfRuntimeBindingPass());
607 return true;
608 }
609 if (PassName == "amdgpu-always-inline") {
610 PM.addPass(AMDGPUAlwaysInlinePass());
611 return true;
612 }
613 if (PassName == "amdgpu-replace-lds-use-with-pointer") {
614 PM.addPass(AMDGPUReplaceLDSUseWithPointerPass());
615 return true;
616 }
617 if (PassName == "amdgpu-lower-module-lds") {
618 PM.addPass(AMDGPULowerModuleLDSPass());
619 return true;
620 }
621 if (PassName == "amdgpu-lower-ctor-dtor") {
622 PM.addPass(AMDGPUCtorDtorLoweringPass());
623 return true;
624 }
625 return false;
626 });
627 PB.registerPipelineParsingCallback(
628 [this](StringRef PassName, FunctionPassManager &PM,
629 ArrayRef<PassBuilder::PipelineElement>) {
630 if (PassName == "amdgpu-simplifylib") {
631 PM.addPass(AMDGPUSimplifyLibCallsPass(*this));
632 return true;
633 }
634 if (PassName == "amdgpu-usenative") {
635 PM.addPass(AMDGPUUseNativeCallsPass());
636 return true;
637 }
638 if (PassName == "amdgpu-promote-alloca") {
639 PM.addPass(AMDGPUPromoteAllocaPass(*this));
640 return true;
641 }
642 if (PassName == "amdgpu-promote-alloca-to-vector") {
643 PM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
644 return true;
645 }
646 if (PassName == "amdgpu-lower-kernel-attributes") {
647 PM.addPass(AMDGPULowerKernelAttributesPass());
648 return true;
649 }
650 if (PassName == "amdgpu-propagate-attributes-early") {
651 PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
652 return true;
653 }
654 if (PassName == "amdgpu-promote-kernel-arguments") {
655 PM.addPass(AMDGPUPromoteKernelArgumentsPass());
656 return true;
657 }
658 return false;
659 });
660
661 PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) {
662 FAM.registerPass([&] { return AMDGPUAA(); });
663 });
664
665 PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
666 if (AAName == "amdgpu-aa") {
667 AAM.registerFunctionAnalysis<AMDGPUAA>();
668 return true;
669 }
670 return false;
671 });
672
673 PB.registerPipelineStartEPCallback(
674 [this](ModulePassManager &PM, OptimizationLevel Level) {
675 FunctionPassManager FPM;
676 FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
677 FPM.addPass(AMDGPUUseNativeCallsPass());
678 if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
679 FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
680 PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
681 });
682
683 PB.registerPipelineEarlySimplificationEPCallback(
684 [this](ModulePassManager &PM, OptimizationLevel Level) {
685 if (Level == OptimizationLevel::O0)
686 return;
687
688 PM.addPass(AMDGPUUnifyMetadataPass());
689 PM.addPass(AMDGPUPrintfRuntimeBindingPass());
690
691 if (InternalizeSymbols) {
692 PM.addPass(InternalizePass(mustPreserveGV));
693 }
694 PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
695 if (InternalizeSymbols) {
696 PM.addPass(GlobalDCEPass());
697 }
698 if (EarlyInlineAll && !EnableFunctionCalls)
699 PM.addPass(AMDGPUAlwaysInlinePass());
700 });
701
702 PB.registerCGSCCOptimizerLateEPCallback(
703 [this](CGSCCPassManager &PM, OptimizationLevel Level) {
704 if (Level == OptimizationLevel::O0)
705 return;
706
707 FunctionPassManager FPM;
708
709 // Add promote kernel arguments pass to the opt pipeline right before
710 // infer address spaces which is needed to do actual address space
711 // rewriting.
712 if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
713 EnablePromoteKernelArguments)
714 FPM.addPass(AMDGPUPromoteKernelArgumentsPass());
715
716 // Add infer address spaces pass to the opt pipeline after inlining
717 // but before SROA to increase SROA opportunities.
718 FPM.addPass(InferAddressSpacesPass());
719
720 // This should run after inlining to have any chance of doing
721 // anything, and before other cleanup optimizations.
722 FPM.addPass(AMDGPULowerKernelAttributesPass());
723
724 if (Level != OptimizationLevel::O0) {
725 // Promote alloca to vector before SROA and loop unroll. If we
726 // manage to eliminate allocas before unroll we may choose to unroll
727 // less.
728 FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
729 }
730
731 PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
732 });
733}
734
735int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
736 return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
737 AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
738 AddrSpace == AMDGPUAS::REGION_ADDRESS)
739 ? -1
740 : 0;
741}
742
743bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
744 unsigned DestAS) const {
745 return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
746 AMDGPU::isFlatGlobalAddrSpace(DestAS);
747}
748
749unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
750 const auto *LD = dyn_cast<LoadInst>(V);
751 if (!LD)
752 return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
753
754 // It must be a generic pointer loaded.
755 assert(V->getType()->isPointerTy() &&(static_cast <bool> (V->getType()->isPointerTy() &&
V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS
) ? void (0) : __assert_fail ("V->getType()->isPointerTy() && V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS"
, "llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp", 756, __extension__
__PRETTY_FUNCTION__))
756 V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS)(static_cast <bool> (V->getType()->isPointerTy() &&
V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS
) ? void (0) : __assert_fail ("V->getType()->isPointerTy() && V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS"
, "llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp", 756, __extension__
__PRETTY_FUNCTION__))
;
757
758 const auto *Ptr = LD->getPointerOperand();
759 if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
760 return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
761 // For a generic pointer loaded from the constant memory, it could be assumed
762 // as a global pointer since the constant memory is only populated on the
763 // host side. As implied by the offload programming model, only global
764 // pointers could be referenced on the host side.
765 return AMDGPUAS::GLOBAL_ADDRESS;
766}
767
768std::pair<const Value *, unsigned>
769AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
770 if (auto *II = dyn_cast<IntrinsicInst>(V)) {
771 switch (II->getIntrinsicID()) {
772 case Intrinsic::amdgcn_is_shared:
773 return std::pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
774 case Intrinsic::amdgcn_is_private:
775 return std::pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
776 default:
777 break;
778 }
779 return std::pair(nullptr, -1);
780 }
781 // Check the global pointer predication based on
782 // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
783 // the order of 'is_shared' and 'is_private' is not significant.
784 Value *Ptr;
785 if (match(
786 const_cast<Value *>(V),
787 m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
788 m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
789 m_Deferred(Ptr))))))
790 return std::pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
791
792 return std::pair(nullptr, -1);
793}
794
795unsigned
796AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
797 switch (Kind) {
798 case PseudoSourceValue::Stack:
799 case PseudoSourceValue::FixedStack:
800 return AMDGPUAS::PRIVATE_ADDRESS;
801 case PseudoSourceValue::ConstantPool:
802 case PseudoSourceValue::GOT:
803 case PseudoSourceValue::JumpTable:
804 case PseudoSourceValue::GlobalValueCallEntry:
805 case PseudoSourceValue::ExternalSymbolCallEntry:
806 return AMDGPUAS::CONSTANT_ADDRESS;
807 }
808 return AMDGPUAS::FLAT_ADDRESS;
809}
810
811//===----------------------------------------------------------------------===//
812// GCN Target Machine (SI+)
813//===----------------------------------------------------------------------===//
814
815GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
816 StringRef CPU, StringRef FS,
817 TargetOptions Options,
818 std::optional<Reloc::Model> RM,
819 std::optional<CodeModel::Model> CM,
820 CodeGenOpt::Level OL, bool JIT)
821 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
822
823const TargetSubtargetInfo *
824GCNTargetMachine::getSubtargetImpl(const Function &F) const {
825 StringRef GPU = getGPUName(F);
826 StringRef FS = getFeatureString(F);
827
828 SmallString<128> SubtargetKey(GPU);
829 SubtargetKey.append(FS);
830
831 auto &I = SubtargetMap[SubtargetKey];
832 if (!I) {
833 // This needs to be done before we create a new subtarget since any
834 // creation will depend on the TM and the code generation flags on the
835 // function that reside in TargetOptions.
836 resetTargetOptions(F);
837 I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
838 }
839
840 I->setScalarizeGlobalBehavior(ScalarizeGlobal);
841
842 return I.get();
843}
844
845TargetTransformInfo
846GCNTargetMachine::getTargetTransformInfo(const Function &F) const {
847 return TargetTransformInfo(GCNTTIImpl(this, F));
848}
849
850//===----------------------------------------------------------------------===//
851// AMDGPU Pass Setup
852//===----------------------------------------------------------------------===//
853
854std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
855 return getStandardCSEConfigForOpt(TM->getOptLevel());
856}
857
858namespace {
859
860class GCNPassConfig final : public AMDGPUPassConfig {
861public:
862 GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
863 : AMDGPUPassConfig(TM, PM) {
864 // It is necessary to know the register usage of the entire call graph. We
865 // allow calls without EnableAMDGPUFunctionCalls if they are marked
866 // noinline, so this is always required.
867 setRequiresCodeGenSCCOrder(true);
868 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
869 }
870
871 GCNTargetMachine &getGCNTargetMachine() const {
872 return getTM<GCNTargetMachine>();
873 }
874
875 ScheduleDAGInstrs *
876 createMachineScheduler(MachineSchedContext *C) const override;
877
878 ScheduleDAGInstrs *
879 createPostMachineScheduler(MachineSchedContext *C) const override {
880 ScheduleDAGMI *DAG = new GCNPostScheduleDAGMILive(
881 C, std::make_unique<PostGenericScheduler>(C),
882 /*RemoveKillFlags=*/true);
883 const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
884 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
885 if (ST.shouldClusterStores())
886 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
887 DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
888 DAG->addMutation(createIGroupLPDAGMutation());
889 if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
890 DAG->addMutation(createVOPDPairingMutation());
891 return DAG;
892 }
893
894 bool addPreISel() override;
895 void addMachineSSAOptimization() override;
896 bool addILPOpts() override;
897 bool addInstSelector() override;
898 bool addIRTranslator() override;
899 void addPreLegalizeMachineIR() override;
900 bool addLegalizeMachineIR() override;
901 void addPreRegBankSelect() override;
902 bool addRegBankSelect() override;
903 void addPreGlobalInstructionSelect() override;
904 bool addGlobalInstructionSelect() override;
905 void addFastRegAlloc() override;
906 void addOptimizedRegAlloc() override;
907
908 FunctionPass *createSGPRAllocPass(bool Optimized);
909 FunctionPass *createVGPRAllocPass(bool Optimized);
910 FunctionPass *createRegAllocPass(bool Optimized) override;
911
912 bool addRegAssignAndRewriteFast() override;
913 bool addRegAssignAndRewriteOptimized() override;
914
915 void addPreRegAlloc() override;
916 bool addPreRewrite() override;
917 void addPostRegAlloc() override;
918 void addPreSched2() override;
919 void addPreEmitPass() override;
920};
921
922} // end anonymous namespace
923
924AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
925 : TargetPassConfig(TM, PM) {
926 // Exceptions and StackMaps are not supported, so these passes will never do
927 // anything.
928 disablePass(&StackMapLivenessID);
929 disablePass(&FuncletLayoutID);
930 // Garbage collection is not supported.
931 disablePass(&GCLoweringID);
932 disablePass(&ShadowStackGCLoweringID);
933}
934
935void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
936 if (getOptLevel() == CodeGenOpt::Aggressive)
937 addPass(createGVNPass());
938 else
939 addPass(createEarlyCSEPass());
940}
941
942void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
943 addPass(createLICMPass());
944 addPass(createSeparateConstOffsetFromGEPPass());
945 // ReassociateGEPs exposes more opportunities for SLSR. See
946 // the example in reassociate-geps-and-slsr.ll.
947 addPass(createStraightLineStrengthReducePass());
948 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
949 // EarlyCSE can reuse.
950 addEarlyCSEOrGVNPass();
951 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
952 addPass(createNaryReassociatePass());
953 // NaryReassociate on GEPs creates redundant common expressions, so run
954 // EarlyCSE after it.
955 addPass(createEarlyCSEPass());
956}
957
958void AMDGPUPassConfig::addIRPasses() {
959 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
960
961 // There is no reason to run these.
962 disablePass(&StackMapLivenessID);
963 disablePass(&FuncletLayoutID);
964 disablePass(&PatchableFunctionID);
965
966 addPass(createAMDGPUPrintfRuntimeBinding());
967 addPass(createAMDGPUCtorDtorLoweringLegacyPass());
968
969 // A call to propagate attributes pass in the backend in case opt was not run.
970 addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
971
972 addPass(createAMDGPULowerIntrinsicsPass());
973
974 // Function calls are not supported, so make sure we inline everything.
975 addPass(createAMDGPUAlwaysInlinePass());
976 addPass(createAlwaysInlinerLegacyPass());
977
978 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
979 if (TM.getTargetTriple().getArch() == Triple::r600)
980 addPass(createR600OpenCLImageTypeLoweringPass());
981
982 // Replace OpenCL enqueued block function pointers with global variables.
983 addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
984
985 // Can increase LDS used by kernel so runs before PromoteAlloca
986 if (EnableLowerModuleLDS) {
987 // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
988 // pass "amdgpu-lower-module-lds", and also it required to be run only if
989 // "amdgpu-lower-module-lds" pass is enabled.
990 if (EnableLDSReplaceWithPointer)
991 addPass(createAMDGPUReplaceLDSUseWithPointerPass());
992
993 addPass(createAMDGPULowerModuleLDSPass());
994 }
995
996 if (TM.getOptLevel() > CodeGenOpt::None)
997 addPass(createInferAddressSpacesPass());
998
999 addPass(createAtomicExpandPass());
1000
1001 if (TM.getOptLevel() > CodeGenOpt::None) {
1002 addPass(createAMDGPUPromoteAlloca());
1003
1004 if (EnableSROA)
1005 addPass(createSROAPass());
1006 if (isPassEnabled(EnableScalarIRPasses))
1007 addStraightLineScalarOptimizationPasses();
1008
1009 if (EnableAMDGPUAliasAnalysis) {
1010 addPass(createAMDGPUAAWrapperPass());
1011 addPass(createExternalAAWrapperPass([](Pass &P, Function &,
1012 AAResults &AAR) {
1013 if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1014 AAR.addAAResult(WrapperPass->getResult());
1015 }));
1016 }
1017
1018 if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
1019 // TODO: May want to move later or split into an early and late one.
1020 addPass(createAMDGPUCodeGenPreparePass());
1021 }
1022 }
1023
1024 TargetPassConfig::addIRPasses();
1025
1026 // EarlyCSE is not always strong enough to clean up what LSR produces. For
1027 // example, GVN can combine
1028 //
1029 // %0 = add %a, %b
1030 // %1 = add %b, %a
1031 //
1032 // and
1033 //
1034 // %0 = shl nsw %a, 2
1035 // %1 = shl %a, 2
1036 //
1037 // but EarlyCSE can do neither of them.
1038 if (isPassEnabled(EnableScalarIRPasses))
1039 addEarlyCSEOrGVNPass();
1040}
1041
1042void AMDGPUPassConfig::addCodeGenPrepare() {
1043 if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1044 if (RemoveIncompatibleFunctions)
1045 addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM));
1046
1047 addPass(createAMDGPUAttributorPass());
1048
1049 // FIXME: This pass adds 2 hacky attributes that can be replaced with an
1050 // analysis, and should be removed.
1051 addPass(createAMDGPUAnnotateKernelFeaturesPass());
1052 }
1053
1054 if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1055 EnableLowerKernelArguments)
1056 addPass(createAMDGPULowerKernelArgumentsPass());
1057
1058 TargetPassConfig::addCodeGenPrepare();
1059
1060 if (isPassEnabled(EnableLoadStoreVectorizer))
1061 addPass(createLoadStoreVectorizerPass());
1062
1063 // LowerSwitch pass may introduce unreachable blocks that can
1064 // cause unexpected behavior for subsequent passes. Placing it
1065 // here seems better that these blocks would get cleaned up by
1066 // UnreachableBlockElim inserted next in the pass flow.
1067 addPass(createLowerSwitchPass());
1068}
1069
1070bool AMDGPUPassConfig::addPreISel() {
1071 if (TM->getOptLevel() > CodeGenOpt::None)
1072 addPass(createFlattenCFGPass());
1073 return false;
1074}
1075
1076bool AMDGPUPassConfig::addInstSelector() {
1077 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
1078 return false;
1079}
1080
1081bool AMDGPUPassConfig::addGCPasses() {
1082 // Do nothing. GC is not supported.
1083 return false;
1084}
1085
1086llvm::ScheduleDAGInstrs *
1087AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
1088 const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1089 ScheduleDAGMILive *DAG = createGenericSchedLive(C);
1090 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1091 if (ST.shouldClusterStores())
1092 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1093 return DAG;
1094}
1095
1096MachineFunctionInfo *R600TargetMachine::createMachineFunctionInfo(
1097 BumpPtrAllocator &Allocator, const Function &F,
1098 const TargetSubtargetInfo *STI) const {
1099 return R600MachineFunctionInfo::create<R600MachineFunctionInfo>(
1100 Allocator, F, static_cast<const R600Subtarget *>(STI));
1101}
1102
1103//===----------------------------------------------------------------------===//
1104// GCN Pass Setup
1105//===----------------------------------------------------------------------===//
1106
1107ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1108 MachineSchedContext *C) const {
1109 const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1110 if (ST.enableSIScheduler())
1111 return createSIMachineScheduler(C);
1112
1113 if (EnableMaxIlpSchedStrategy)
1114 return createGCNMaxILPMachineScheduler(C);
1115
1116 return createGCNMaxOccupancyMachineScheduler(C);
1117}
1118
1119bool GCNPassConfig::addPreISel() {
1120 AMDGPUPassConfig::addPreISel();
1121
1122 if (TM->getOptLevel() > CodeGenOpt::None)
1123 addPass(createAMDGPULateCodeGenPreparePass());
1124
1125 if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1126 addPass(createAMDGPUAtomicOptimizerPass());
1127 }
1128
1129 if (TM->getOptLevel() > CodeGenOpt::None)
1130 addPass(createSinkingPass());
1131
1132 // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1133 // regions formed by them.
1134 addPass(&AMDGPUUnifyDivergentExitNodesID);
1135 if (!LateCFGStructurize) {
1136 if (EnableStructurizerWorkarounds) {
1137 addPass(createFixIrreduciblePass());
1138 addPass(createUnifyLoopExitsPass());
1139 }
1140 addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1141 }
1142 addPass(createAMDGPUAnnotateUniformValues());
1143 if (!LateCFGStructurize) {
1144 addPass(createSIAnnotateControlFlowPass());
1145 // TODO: Move this right after structurizeCFG to avoid extra divergence
1146 // analysis. This depends on stopping SIAnnotateControlFlow from making
1147 // control flow modifications.
1148 addPass(createAMDGPURewriteUndefForPHIPass());
1149 }
1150 addPass(createLCSSAPass());
1151
1152 if (TM->getOptLevel() > CodeGenOpt::Less)
1153 addPass(&AMDGPUPerfHintAnalysisID);
1154
1155 return false;
1156}
1157
1158void GCNPassConfig::addMachineSSAOptimization() {
1159 TargetPassConfig::addMachineSSAOptimization();
1160
1161 // We want to fold operands after PeepholeOptimizer has run (or as part of
1162 // it), because it will eliminate extra copies making it easier to fold the
1163 // real source operand. We want to eliminate dead instructions after, so that
1164 // we see fewer uses of the copies. We then need to clean up the dead
1165 // instructions leftover after the operands are folded as well.
1166 //
1167 // XXX - Can we get away without running DeadMachineInstructionElim again?
1168 addPass(&SIFoldOperandsID);
1169 if (EnableDPPCombine)
1170 addPass(&GCNDPPCombineID);
1171 addPass(&SILoadStoreOptimizerID);
1172 if (isPassEnabled(EnableSDWAPeephole)) {
1173 addPass(&SIPeepholeSDWAID);
1174 addPass(&EarlyMachineLICMID);
1175 addPass(&MachineCSEID);
1176 addPass(&SIFoldOperandsID);
1177 }
1178 addPass(&DeadMachineInstructionElimID);
1179 addPass(createSIShrinkInstructionsPass());
1180}
1181
1182bool GCNPassConfig::addILPOpts() {
1183 if (EnableEarlyIfConversion)
1184 addPass(&EarlyIfConverterID);
1185
1186 TargetPassConfig::addILPOpts();
1187 return false;
1188}
1189
1190bool GCNPassConfig::addInstSelector() {
1191 AMDGPUPassConfig::addInstSelector();
1192 addPass(&SIFixSGPRCopiesID);
1193 addPass(createSILowerI1CopiesPass());
1194 return false;
1195}
1196
1197bool GCNPassConfig::addIRTranslator() {
1198 addPass(new IRTranslator(getOptLevel()));
1199 return false;
1200}
1201
1202void GCNPassConfig::addPreLegalizeMachineIR() {
1203 bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1204 addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1205 addPass(new Localizer());
1206}
1207
1208bool GCNPassConfig::addLegalizeMachineIR() {
1209 addPass(new Legalizer());
1210 return false;
1211}
1212
1213void GCNPassConfig::addPreRegBankSelect() {
1214 bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1215 addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1216}
1217
1218bool GCNPassConfig::addRegBankSelect() {
1219 addPass(new AMDGPURegBankSelect());
1220 return false;
1221}
1222
1223void GCNPassConfig::addPreGlobalInstructionSelect() {
1224 bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1225 addPass(createAMDGPURegBankCombiner(IsOptNone));
1226}
1227
1228bool GCNPassConfig::addGlobalInstructionSelect() {
1229 addPass(new InstructionSelect(getOptLevel()));
1230 return false;
1231}
1232
1233void GCNPassConfig::addPreRegAlloc() {
1234 if (LateCFGStructurize) {
1235 addPass(createAMDGPUMachineCFGStructurizerPass());
1236 }
1237}
1238
1239void GCNPassConfig::addFastRegAlloc() {
1240 // FIXME: We have to disable the verifier here because of PHIElimination +
1241 // TwoAddressInstructions disabling it.
1242
1243 // This must be run immediately after phi elimination and before
1244 // TwoAddressInstructions, otherwise the processing of the tied operand of
1245 // SI_ELSE will introduce a copy of the tied operand source after the else.
1246 insertPass(&PHIEliminationID, &SILowerControlFlowID);
1247
1248 insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
1249 insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
1250
1251 TargetPassConfig::addFastRegAlloc();
1252}
1253
1254void GCNPassConfig::addOptimizedRegAlloc() {
1255 // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1256 // instructions that cause scheduling barriers.
1257 insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1258 insertPass(&MachineSchedulerID, &SIPreAllocateWWMRegsID);
1259
1260 if (OptExecMaskPreRA)
1261 insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
1262
1263 if (isPassEnabled(EnablePreRAOptimizations))
1264 insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
1265
1266 // This is not an essential optimization and it has a noticeable impact on
1267 // compilation time, so we only enable it from O2.
1268 if (TM->getOptLevel() > CodeGenOpt::Less)
1269 insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
1270
1271 // FIXME: when an instruction has a Killed operand, and the instruction is
1272 // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1273 // the register in LiveVariables, this would trigger a failure in verifier,
1274 // we should fix it and enable the verifier.
1275 if (OptVGPRLiveRange)
1276 insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID);
1277 // This must be run immediately after phi elimination and before
1278 // TwoAddressInstructions, otherwise the processing of the tied operand of
1279 // SI_ELSE will introduce a copy of the tied operand source after the else.
1280 insertPass(&PHIEliminationID, &SILowerControlFlowID);
1281
1282 if (EnableDCEInRA)
1283 insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
1284
1285 TargetPassConfig::addOptimizedRegAlloc();
1286}
1287
1288bool GCNPassConfig::addPreRewrite() {
1289 if (EnableRegReassign)
1290 addPass(&GCNNSAReassignID);
1291 return true;
1292}
1293
1294FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1295 // Initialize the global default.
1296 llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1297 initializeDefaultSGPRRegisterAllocatorOnce);
1298
1299 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1300 if (Ctor != useDefaultRegisterAllocator)
1301 return Ctor();
1302
1303 if (Optimized)
1304 return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1305
1306 return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1307}
1308
1309FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1310 // Initialize the global default.
1311 llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1312 initializeDefaultVGPRRegisterAllocatorOnce);
1313
1314 RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1315 if (Ctor != useDefaultRegisterAllocator)
1316 return Ctor();
1317
1318 if (Optimized)
1319 return createGreedyVGPRRegisterAllocator();
1320
1321 return createFastVGPRRegisterAllocator();
1322}
1323
1324FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1325 llvm_unreachable("should not be used")::llvm::llvm_unreachable_internal("should not be used", "llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp"
, 1325)
;
1326}
1327
1328static const char RegAllocOptNotSupportedMessage[] =
1329 "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1330
1331bool GCNPassConfig::addRegAssignAndRewriteFast() {
1332 if (!usingDefaultRegAlloc())
1333 report_fatal_error(RegAllocOptNotSupportedMessage);
1334
1335 addPass(createSGPRAllocPass(false));
1336
1337 // Equivalent of PEI for SGPRs.
1338 addPass(&SILowerSGPRSpillsID);
1339
1340 addPass(createVGPRAllocPass(false));
1341 return true;
1342}
1343
1344bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1345 if (!usingDefaultRegAlloc())
1346 report_fatal_error(RegAllocOptNotSupportedMessage);
1347
1348 addPass(createSGPRAllocPass(true));
1349
1350 // Commit allocated register changes. This is mostly necessary because too
1351 // many things rely on the use lists of the physical registers, such as the
1352 // verifier. This is only necessary with allocators which use LiveIntervals,
1353 // since FastRegAlloc does the replacements itself.
1354 addPass(createVirtRegRewriter(false));
1355
1356 // Equivalent of PEI for SGPRs.
1357 addPass(&SILowerSGPRSpillsID);
1358
1359 addPass(createVGPRAllocPass(true));
1360
1361 addPreRewrite();
1362 addPass(&VirtRegRewriterID);
1363
1364 return true;
1365}
1366
1367void GCNPassConfig::addPostRegAlloc() {
1368 addPass(&SIFixVGPRCopiesID);
1369 if (getOptLevel() > CodeGenOpt::None)
1370 addPass(&SIOptimizeExecMaskingID);
1371 TargetPassConfig::addPostRegAlloc();
1372}
1373
1374void GCNPassConfig::addPreSched2() {
1375 if (TM->getOptLevel() > CodeGenOpt::None)
1376 addPass(createSIShrinkInstructionsPass());
1377 addPass(&SIPostRABundlerID);
1378}
1379
1380void GCNPassConfig::addPreEmitPass() {
1381 if (isPassEnabled(EnableVOPD, CodeGenOpt::Less))
1382 addPass(&GCNCreateVOPDID);
1383 addPass(createSIMemoryLegalizerPass());
1384 addPass(createSIInsertWaitcntsPass());
1385
1386 addPass(createSIModeRegisterPass());
1387
1388 if (getOptLevel() > CodeGenOpt::None)
1389 addPass(&SIInsertHardClausesID);
1390
1391 addPass(&SILateBranchLoweringPassID);
1392 if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))
1393 addPass(createAMDGPUSetWavePriorityPass());
1394 if (getOptLevel() > CodeGenOpt::None)
1395 addPass(&SIPreEmitPeepholeID);
1396 // The hazard recognizer that runs as part of the post-ra scheduler does not
1397 // guarantee to be able handle all hazards correctly. This is because if there
1398 // are multiple scheduling regions in a basic block, the regions are scheduled
1399 // bottom up, so when we begin to schedule a region we don't know what
1400 // instructions were emitted directly before it.
1401 //
1402 // Here we add a stand-alone hazard recognizer pass which can handle all
1403 // cases.
1404 addPass(&PostRAHazardRecognizerID);
1405
1406 if (getOptLevel() > CodeGenOpt::Less)
1407 addPass(&AMDGPUReleaseVGPRsID);
1408
1409 if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
1410 addPass(&AMDGPUInsertDelayAluID);
1411
1412 addPass(&BranchRelaxationPassID);
1413}
1414
1415TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
1416 return new GCNPassConfig(*this, PM);
1417}
1418
1419MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo(
1420 BumpPtrAllocator &Allocator, const Function &F,
1421 const TargetSubtargetInfo *STI) const {
1422 return SIMachineFunctionInfo::create<SIMachineFunctionInfo>(
1423 Allocator, F, static_cast<const GCNSubtarget *>(STI));
1424}
1425
1426yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const {
1427 return new yaml::SIMachineFunctionInfo();
1428}
1429
1430yaml::MachineFunctionInfo *
1431GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
1432 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1433 return new yaml::SIMachineFunctionInfo(
1434 *MFI, *MF.getSubtarget<GCNSubtarget>().getRegisterInfo(), MF);
1435}
1436
1437bool GCNTargetMachine::parseMachineFunctionInfo(
1438 const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
1439 SMDiagnostic &Error, SMRange &SourceRange) const {
1440 const yaml::SIMachineFunctionInfo &YamlMFI =
1441 static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1442 MachineFunction &MF = PFS.MF;
1443 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1444
1445 if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1446 return true;
1447
1448 if (MFI->Occupancy == 0) {
1449 // Fixup the subtarget dependent default value.
1450 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1451 MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1452 }
1453
1454 auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1455 Register TempReg;
1456 if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1457 SourceRange = RegName.SourceRange;
1458 return true;
1459 }
1460 RegVal = TempReg;
1461
1462 return false;
1463 };
1464
1465 auto parseOptionalRegister = [&](const yaml::StringValue &RegName,
1466 Register &RegVal) {
1467 return !RegName.Value.empty() && parseRegister(RegName, RegVal);
1468 };
1469
1470 if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
1471 return true;
1472
1473 auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1474 // Create a diagnostic for a the register string literal.
1475 const MemoryBuffer &Buffer =
1476 *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1477 Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1478 RegName.Value.size(), SourceMgr::DK_Error,
1479 "incorrect register class for field", RegName.Value,
1480 std::nullopt, std::nullopt);
1481 SourceRange = RegName.SourceRange;
1482 return true;
1483 };
1484
1485 if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1486 parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1487 parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1488 return true;
1489
1490 if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1491 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1492 return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1493 }
1494
1495 if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1496 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1497 return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1498 }
1499
1500 if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1501 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1502 return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1503 }
1504
1505 for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {
1506 Register ParsedReg;
1507 if (parseRegister(YamlReg, ParsedReg))
1508 return true;
1509
1510 MFI->reserveWWMRegister(ParsedReg);
1511 }
1512
1513 auto parseAndCheckArgument = [&](const std::optional<yaml::SIArgument> &A,
1514 const TargetRegisterClass &RC,
1515 ArgDescriptor &Arg, unsigned UserSGPRs,
1516 unsigned SystemSGPRs) {
1517 // Skip parsing if it's not present.
1518 if (!A)
1519 return false;
1520
1521 if (A->IsRegister) {
1522 Register Reg;
1523 if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1524 SourceRange = A->RegisterName.SourceRange;
1525 return true;
1526 }
1527 if (!RC.contains(Reg))
1528 return diagnoseRegisterClass(A->RegisterName);
1529 Arg = ArgDescriptor::createRegister(Reg);
1530 } else
1531 Arg = ArgDescriptor::createStack(A->StackOffset);
1532 // Check and apply the optional mask.
1533 if (A->Mask)
1534 Arg = ArgDescriptor::createArg(Arg, *A->Mask);
1535
1536 MFI->NumUserSGPRs += UserSGPRs;
1537 MFI->NumSystemSGPRs += SystemSGPRs;
1538 return false;
1539 };
1540
1541 if (YamlMFI.ArgInfo &&
1542 (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1543 AMDGPU::SGPR_128RegClass,
1544 MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1545 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1546 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1547 2, 0) ||
1548 parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1549 MFI->ArgInfo.QueuePtr, 2, 0) ||
1550 parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1551 AMDGPU::SReg_64RegClass,
1552 MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1553 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1554 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1555 2, 0) ||
1556 parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1557 AMDGPU::SReg_64RegClass,
1558 MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1559 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1560 AMDGPU::SGPR_32RegClass,
1561 MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1562 parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,
1563 AMDGPU::SGPR_32RegClass,
1564 MFI->ArgInfo.LDSKernelId, 0, 1) ||
1565 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1566 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1567 0, 1) ||
1568 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1569 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1570 0, 1) ||
1571 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1572 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1573 0, 1) ||
1574 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1575 AMDGPU::SGPR_32RegClass,
1576 MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1577 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1578 AMDGPU::SGPR_32RegClass,
1579 MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1580 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1581 AMDGPU::SReg_64RegClass,
1582 MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1583 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1584 AMDGPU::SReg_64RegClass,
1585 MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1586 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1587 AMDGPU::VGPR_32RegClass,
1588 MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1589 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1590 AMDGPU::VGPR_32RegClass,
1591 MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1592 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1593 AMDGPU::VGPR_32RegClass,
1594 MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1595 return true;
1596
1597 MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1598 MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1599
1600 // FIXME: Move proper support for denormal-fp-math into base MachineFunction
1601 MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals
1602 ? DenormalMode::IEEE
1603 : DenormalMode::PreserveSign;
1604 MFI->Mode.FP32Denormals.Output = YamlMFI.Mode.FP32OutputDenormals
1605 ? DenormalMode::IEEE
1606 : DenormalMode::PreserveSign;
1607
1608 MFI->Mode.FP64FP16Denormals.Input = YamlMFI.Mode.FP64FP16InputDenormals
1609 ? DenormalMode::IEEE
1610 : DenormalMode::PreserveSign;
1611 MFI->Mode.FP64FP16Denormals.Output = YamlMFI.Mode.FP64FP16OutputDenormals
1612 ? DenormalMode::IEEE
1613 : DenormalMode::PreserveSign;
1614
1615 return false;
1616}