LLVM 19.0.0git
AMDGPU.h
Go to the documentation of this file.
1//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// \file
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13#include "llvm/IR/PassManager.h"
14#include "llvm/Pass.h"
17
18namespace llvm {
19
20class AMDGPUTargetMachine;
21class TargetMachine;
22
23// GlobalISel passes
30
32
33// SI Passes
52
65
66struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
69};
70
72 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
75
76private:
77 TargetMachine &TM;
78};
79
80struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
82};
83
85
88
90
96
97// DPP/Iterative option enables the atomic optimizer with given strategy
98// whereas None disables the atomic optimizer.
99enum class ScanOptions { DPP, Iterative, None };
100FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
102extern char &AMDGPUAtomicOptimizerID;
103
107
111
115
117 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
119};
120
124
126 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
128};
129
132
133struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
136
138};
139
142
144 : PassInfoMixin<AMDGPULowerBufferFatPointersPass> {
147
148private:
149 const TargetMachine &TM;
150};
151
153extern char &AMDGPURewriteOutArgumentsID;
154
156extern char &GCNDPPCombineID;
157
159extern char &SIFoldOperandsID;
160
162extern char &SIPeepholeSDWAID;
163
165extern char &SIShrinkInstructionsID;
166
168extern char &SIFixSGPRCopiesID;
169
171extern char &SIFixVGPRCopiesID;
172
174extern char &SILowerWWMCopiesID;
175
177extern char &SILowerI1CopiesID;
178
181
183extern char &AMDGPUMarkLastScratchLoadID;
184
186extern char &SILowerSGPRSpillsID;
187
189extern char &SILoadStoreOptimizerID;
190
192extern char &SIWholeQuadModeID;
193
195extern char &SILowerControlFlowID;
196
198extern char &SIPreEmitPeepholeID;
199
201extern char &SILateBranchLoweringPassID;
202
204extern char &SIOptimizeExecMaskingID;
205
207extern char &SIPreAllocateWWMRegsID;
208
211
213extern char &AMDGPUPerfHintAnalysisID;
214
216extern char &GCNRegPressurePrinterID;
217
218// Passes common to R600 and SI
221extern char &AMDGPUPromoteAllocaID;
222
226
227struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
230
231private:
232 TargetMachine &TM;
233};
234
236 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
239
240private:
241 TargetMachine &TM;
242};
243
244struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
246 : TM(TM), ScanImpl(ScanImpl) {}
248
249private:
250 TargetMachine &TM;
251 ScanOptions ScanImpl;
252};
253
256ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
257
258struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
259 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
261
262private:
263 bool GlobalOpt;
264};
265
267 : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
268private:
269 TargetMachine &TM;
270
271public:
274};
275
277 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
278private:
279 TargetMachine &TM;
280
281public:
284};
285
286class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
287private:
288 TargetMachine &TM;
289
290public:
293};
294
296
300
303
305 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
307};
308
311extern char &AMDGPUUnifyMetadataID;
312
313struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
315};
316
319
321extern char &SIOptimizeVGPRLiveRangeID;
322
325
327extern char &AMDGPUCodeGenPrepareID;
328
331
333extern char &AMDGPULateCodeGenPrepareID;
334
338
340 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
341public:
344};
345
347extern char &SIAnnotateControlFlowPassID;
348
350extern char &SIMemoryLegalizerID;
351
353extern char &SIModeRegisterID;
354
356extern char &AMDGPUInsertDelayAluID;
357
359extern char &AMDGPUInsertSingleUseVDSTID;
360
362extern char &SIInsertHardClausesID;
363
365extern char &SIInsertWaitcntsID;
366
368extern char &SIFormMemoryClausesID;
369
371extern char &SIPostRABundlerID;
372
374extern char &GCNCreateVOPDID;
375
378
383
385
389
391extern char &GCNNSAReassignID;
392
394extern char &GCNPreRALongBranchRegID;
395
397extern char &GCNPreRAOptimizationsID;
398
401
403extern char &GCNRewritePartialRegUsesID;
404
405namespace AMDGPU {
413
414// FIXME: Missing constant_32bit
415inline bool isFlatGlobalAddrSpace(unsigned AS) {
416 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
420}
421
422inline bool isExtendedGlobalAddrSpace(unsigned AS) {
426}
427
428static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
429 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
430
432 return true;
433
434 // This array is indexed by address space value enum elements 0 ... to 9
435 // clang-format off
436 static const bool ASAliasRules[10][10] = {
437 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
438 /* Flat */ {true, true, false, true, true, true, true, true, true, true},
439 /* Global */ {true, true, false, false, true, false, true, true, true, true},
440 /* Region */ {false, false, true, false, false, false, false, false, false, false},
441 /* Group */ {true, false, false, true, false, false, false, false, false, false},
442 /* Constant */ {true, true, false, false, false, false, true, true, true, true},
443 /* Private */ {true, false, false, false, false, true, false, false, false, false},
444 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
445 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
446 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
447 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
448 };
449 // clang-format on
450
451 return ASAliasRules[AS1][AS2];
452}
453
454}
455
456} // End namespace llvm
457
458#endif
AMDGPU address space definition.
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPUAttributorPass(TargetMachine &TM)
Definition: AMDGPU.h:291
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
AMDGPUCodeGenPreparePass(TargetMachine &TM)
Definition: AMDGPU.h:272
AMDGPULowerKernelArgumentsPass(TargetMachine &TM)
Definition: AMDGPU.h:282
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:242
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:415
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
Definition: AMDGPU.h:428
@ TI_SCRATCH_RSRC_DWORD1
Definition: AMDGPU.h:409
@ TI_SCRATCH_RSRC_DWORD3
Definition: AMDGPU.h:411
@ TI_SCRATCH_RSRC_DWORD0
Definition: AMDGPU.h:408
@ TI_SCRATCH_RSRC_DWORD2
Definition: AMDGPU.h:410
@ TI_CONSTDATA_START
Definition: AMDGPU.h:407
bool isExtendedGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:422
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeSIFormMemoryClausesPass(PassRegistry &)
char & SIPreAllocateWWMRegsID
ScanOptions
Definition: AMDGPU.h:99
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createSIPreAllocateWWMRegsPass()
FunctionPass * createAMDGPUSetWavePriorityPass()
char & AMDGPUCtorDtorLoweringLegacyPassID
void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &)
void initializeGCNCreateVOPDPass(PassRegistry &)
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
char & AMDGPUAnnotateKernelFeaturesID
char & GCNPreRAOptimizationsID
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &)
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &)
char & SIMemoryLegalizerID
void initializeAMDGPUAttributorLegacyPass(PassRegistry &)
char & SIPostRABundlerID
FunctionPass * createSIModeRegisterPass()
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
ModulePass * createAMDGPULowerBufferFatPointersPass()
void initializeSIModeRegisterPass(PassRegistry &)
ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
char & AMDGPUImageIntrinsicOptimizerID
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &)
char & AMDGPUPromoteKernelArgumentsID
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
char & GCNRewritePartialRegUsesID
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
void initializeSIShrinkInstructionsPass(PassRegistry &)
char & SIFoldOperandsID
FunctionPass * createAMDGPURewriteOutArgumentsPass()
void initializeGCNPreRALongBranchRegPass(PassRegistry &)
char & SILowerI1CopiesID
char & AMDGPUResourceUsageAnalysisID
char & SILoadStoreOptimizerID
FunctionPass * createSIWholeQuadModePass()
void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &)
ModulePass * createAMDGPULowerKernelAttributesPass()
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
FunctionPass * createSIPeepholeSDWAPass()
void initializeSIPreEmitPeepholePass(PassRegistry &)
char & AMDGPUPromoteAllocaToVectorID
FunctionPass * createSILoadStoreOptimizerPass()
char & SILowerWWMCopiesID
void initializeSIFixVGPRCopiesPass(PassRegistry &)
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry &)
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)
void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &)
void initializeSILowerWWMCopiesPass(PassRegistry &)
void initializeGCNNSAReassignPass(PassRegistry &)
void initializeSIInsertWaitcntsPass(PassRegistry &)
char & AMDGPUInsertSingleUseVDSTID
char & SIFormMemoryClausesID
char & AMDGPURemoveIncompatibleFunctionsID
void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)
void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &)
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void initializeSILoadStoreOptimizerPass(PassRegistry &)
void initializeSILateBranchLoweringPass(PassRegistry &)
void initializeSIPeepholeSDWAPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAllocaToVector()
char & AMDGPULateCodeGenPrepareID
char & AMDGPUUnifyDivergentExitNodesID
char & SIInsertWaitcntsID
FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)
char & AMDGPUPrintfRuntimeBindingID
char & GCNNSAReassignID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
char & AMDGPUPerfHintAnalysisID
char & SILowerSGPRSpillsID
char & SILateBranchLoweringPassID
char & SIModeRegisterID
FunctionPass * createGCNPreRAOptimizationsPass()
FunctionPass * createSIShrinkInstructionsPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
void initializeSIPostRABundlerPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
Pass * createAMDGPUAttributorLegacyPass()
void initializeSIWholeQuadModePass(PassRegistry &)
FunctionPass * createAMDGPULowerKernelArgumentsPass()
char & AMDGPUInsertDelayAluID
Pass * createAMDGPUAnnotateKernelFeaturesPass()
char & SIOptimizeVGPRLiveRangeID
char & SIOptimizeExecMaskingPreRAID
char & AMDGPULowerModuleLDSLegacyPassID
void initializeSIInsertHardClausesPass(PassRegistry &)
FunctionPass * createSIPostRABundlerPass()
FunctionPass * createSIFormMemoryClausesPass()
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
Pass * createAMDGPUStructurizeCFGPass()
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
char & AMDGPULowerBufferFatPointersID
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ModulePass * createAMDGPUPrintfRuntimeBinding()
void initializeSIMemoryLegalizerPass(PassRegistry &)
char & AMDGPUUnifyMetadataID
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &)
FunctionPass * createAMDGPUAnnotateUniformValues()
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
char & SIPreEmitPeepholeID
ModulePass * createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *)
FunctionPass * createSILowerI1CopiesPass()
void initializeGCNRegPressurePrinterPass(PassRegistry &)
char & AMDGPURewriteOutArgumentsID
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
char & AMDGPUPromoteAllocaID
FunctionPass * createAMDGPUCodeGenPreparePass()
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & AMDGPUMachineCFGStructurizerID
char & AMDGPULowerKernelAttributesID
char & GCNDPPCombineID
char & GCNRegPressurePrinterID
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
FunctionPass * createSIFoldOperandsPass()
char & SIWholeQuadModeID
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
FunctionPass * createAMDGPURewriteUndefForPHILegacyPass()
void initializeSILowerSGPRSpillsPass(PassRegistry &)
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & SIInsertHardClausesID
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
char & GCNCreateVOPDID
void initializeSILowerI1CopiesPass(PassRegistry &)
char & SILowerControlFlowID
char & AMDGPUAtomicOptimizerID
char & SIAnnotateControlFlowPassID
FunctionPass * createLowerWWMCopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & AMDGPUAnnotateUniformValuesPassID
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
FunctionPass * createSIMemoryLegalizerPass()
void initializeSIFoldOperandsPass(PassRegistry &)
void initializeSILowerControlFlowPass(PassRegistry &)
char & SIPeepholeSDWAID
char & AMDGPUOpenCLEnqueuedBlockLoweringID
char & SIFixVGPRCopiesID
void initializeAMDGPURegBankSelectPass(PassRegistry &)
char & AMDGPURewriteUndefForPHILegacyPassID
FunctionPass * createSIOptimizeVGPRLiveRangePass()
FunctionPass * createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &)
FunctionPass * createSIInsertWaitcntsPass()
void initializeGCNDPPCombinePass(PassRegistry &)
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
FunctionPass * createGCNDPPCombinePass()
FunctionPass * createAMDGPULateCodeGenPreparePass()
char & AMDGPUMarkLastScratchLoadID
FunctionPass * createSIFixSGPRCopiesPass()
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeAMDGPUInsertDelayAluPass(PassRegistry &)
char & SIOptimizeExecMaskingID
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSIFixControlFlowLiveIntervalsPass()
char & SIFixSGPRCopiesID
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
char & AMDGPULowerKernelArgumentsID
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
char & AMDGPUCodeGenPrepareID
void initializeAMDGPUSetWavePriorityPass(PassRegistry &)
char & SIShrinkInstructionsID
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
char & GCNPreRALongBranchRegID
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
AMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPU.h:259
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
Definition: AMDGPU.h:245
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM)
Definition: AMDGPU.h:73
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPULowerBufferFatPointersPass(const TargetMachine &TM)
Definition: AMDGPU.h:145
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_)
Definition: AMDGPU.h:135
const AMDGPUTargetMachine & TM
Definition: AMDGPU.h:134
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPUPromoteAllocaPass(TargetMachine &TM)
Definition: AMDGPU.h:228
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM)
Definition: AMDGPU.h:237
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:68