LLVM 19.0.0git
AMDGPU.h
Go to the documentation of this file.
1//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// \file
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13#include "llvm/IR/PassManager.h"
14#include "llvm/Pass.h"
17
18namespace llvm {
19
20class AMDGPUTargetMachine;
21class TargetMachine;
22
23// GlobalISel passes
30
32
33// SI Passes
52
64
65struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
68};
69
71 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
74
75private:
76 TargetMachine &TM;
77};
78
79struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
81};
82
84
87
89
95
96// DPP/Iterative option enables the atomic optimizer with given strategy
97// whereas None disables the atomic optimizer.
98enum class ScanOptions { DPP, Iterative, None };
99FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
101extern char &AMDGPUAtomicOptimizerID;
102
106
110
114
116 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
118};
119
123
125 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
127};
128
131
132struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
135
137};
138
140extern char &AMDGPURewriteOutArgumentsID;
141
143extern char &GCNDPPCombineID;
144
146extern char &SIFoldOperandsID;
147
149extern char &SIPeepholeSDWAID;
150
152extern char &SIShrinkInstructionsID;
153
155extern char &SIFixSGPRCopiesID;
156
158extern char &SIFixVGPRCopiesID;
159
161extern char &SILowerWWMCopiesID;
162
164extern char &SILowerI1CopiesID;
165
168
170extern char &AMDGPUMarkLastScratchLoadID;
171
173extern char &SILowerSGPRSpillsID;
174
176extern char &SILoadStoreOptimizerID;
177
179extern char &SIWholeQuadModeID;
180
182extern char &SILowerControlFlowID;
183
185extern char &SIPreEmitPeepholeID;
186
188extern char &SILateBranchLoweringPassID;
189
191extern char &SIOptimizeExecMaskingID;
192
194extern char &SIPreAllocateWWMRegsID;
195
198
200extern char &AMDGPUPerfHintAnalysisID;
201
203extern char &GCNRegPressurePrinterID;
204
205// Passes common to R600 and SI
208extern char &AMDGPUPromoteAllocaID;
209
213
214struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
217
218private:
219 TargetMachine &TM;
220};
221
223 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
226
227private:
228 TargetMachine &TM;
229};
230
231struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
233 : TM(TM), ScanImpl(ScanImpl) {}
235
236private:
237 TargetMachine &TM;
238 ScanOptions ScanImpl;
239};
240
243ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
244
245struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
246 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
248
249private:
250 bool GlobalOpt;
251};
252
254 : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
255private:
256 TargetMachine &TM;
257
258public:
261};
262
264 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
265private:
266 TargetMachine &TM;
267
268public:
271};
272
273class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
274private:
275 TargetMachine &TM;
276
277public:
280};
281
283
287
290
292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
294};
295
298extern char &AMDGPUUnifyMetadataID;
299
300struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
302};
303
306
308extern char &SIOptimizeVGPRLiveRangeID;
309
312
314extern char &AMDGPUCodeGenPrepareID;
315
318
320extern char &AMDGPULateCodeGenPrepareID;
321
325
327 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
328public:
331};
332
334extern char &SIAnnotateControlFlowPassID;
335
337extern char &SIMemoryLegalizerID;
338
340extern char &SIModeRegisterID;
341
343extern char &AMDGPUInsertDelayAluID;
344
346extern char &AMDGPUInsertSingleUseVDSTID;
347
349extern char &SIInsertHardClausesID;
350
352extern char &SIInsertWaitcntsID;
353
355extern char &SIFormMemoryClausesID;
356
358extern char &SIPostRABundlerID;
359
361extern char &GCNCreateVOPDID;
362
365
370
372
376
378extern char &GCNNSAReassignID;
379
381extern char &GCNPreRALongBranchRegID;
382
384extern char &GCNPreRAOptimizationsID;
385
388
390extern char &GCNRewritePartialRegUsesID;
391
392namespace AMDGPU {
400
401// FIXME: Missing constant_32bit
402inline bool isFlatGlobalAddrSpace(unsigned AS) {
403 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
407}
408
409inline bool isExtendedGlobalAddrSpace(unsigned AS) {
413}
414
415static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
416 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
417
419 return true;
420
421 // This array is indexed by address space value enum elements 0 ... to 9
422 // clang-format off
423 static const bool ASAliasRules[10][10] = {
424 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
425 /* Flat */ {true, true, false, true, true, true, true, true, true, true},
426 /* Global */ {true, true, false, false, true, false, true, true, true, true},
427 /* Region */ {false, false, true, false, false, false, false, false, false, false},
428 /* Group */ {true, false, false, true, false, false, false, false, false, false},
429 /* Constant */ {true, true, false, false, false, false, true, true, true, true},
430 /* Private */ {true, false, false, false, false, true, false, false, false, false},
431 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
432 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
433 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
434 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
435 };
436 // clang-format on
437
438 return ASAliasRules[AS1][AS2];
439}
440
441}
442
443} // End namespace llvm
444
445#endif
AMDGPU address space definition.
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPUAttributorPass(TargetMachine &TM)
Definition: AMDGPU.h:278
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
AMDGPUCodeGenPreparePass(TargetMachine &TM)
Definition: AMDGPU.h:259
AMDGPULowerKernelArgumentsPass(TargetMachine &TM)
Definition: AMDGPU.h:269
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:402
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
Definition: AMDGPU.h:415
@ TI_SCRATCH_RSRC_DWORD1
Definition: AMDGPU.h:396
@ TI_SCRATCH_RSRC_DWORD3
Definition: AMDGPU.h:398
@ TI_SCRATCH_RSRC_DWORD0
Definition: AMDGPU.h:395
@ TI_SCRATCH_RSRC_DWORD2
Definition: AMDGPU.h:397
@ TI_CONSTDATA_START
Definition: AMDGPU.h:394
bool isExtendedGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeSIFormMemoryClausesPass(PassRegistry &)
char & SIPreAllocateWWMRegsID
ScanOptions
Definition: AMDGPU.h:98
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createSIPreAllocateWWMRegsPass()
FunctionPass * createAMDGPUSetWavePriorityPass()
char & AMDGPUCtorDtorLoweringLegacyPassID
void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &)
void initializeGCNCreateVOPDPass(PassRegistry &)
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
char & AMDGPUAnnotateKernelFeaturesID
char & GCNPreRAOptimizationsID
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &)
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &)
char & SIMemoryLegalizerID
void initializeAMDGPUAttributorLegacyPass(PassRegistry &)
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
char & SIPostRABundlerID
FunctionPass * createSIModeRegisterPass()
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
void initializeSIModeRegisterPass(PassRegistry &)
ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
char & AMDGPUImageIntrinsicOptimizerID
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &)
char & AMDGPUPromoteKernelArgumentsID
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
char & GCNRewritePartialRegUsesID
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
void initializeSIShrinkInstructionsPass(PassRegistry &)
char & SIFoldOperandsID
FunctionPass * createAMDGPURewriteOutArgumentsPass()
void initializeGCNPreRALongBranchRegPass(PassRegistry &)
char & SILowerI1CopiesID
char & AMDGPUResourceUsageAnalysisID
char & SILoadStoreOptimizerID
FunctionPass * createSIWholeQuadModePass()
ModulePass * createAMDGPULowerKernelAttributesPass()
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
FunctionPass * createSIPeepholeSDWAPass()
void initializeSIPreEmitPeepholePass(PassRegistry &)
FunctionPass * createSILoadStoreOptimizerPass()
char & SILowerWWMCopiesID
void initializeSIFixVGPRCopiesPass(PassRegistry &)
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry &)
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)
void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &)
void initializeSILowerWWMCopiesPass(PassRegistry &)
void initializeGCNNSAReassignPass(PassRegistry &)
void initializeSIInsertWaitcntsPass(PassRegistry &)
char & AMDGPUInsertSingleUseVDSTID
char & SIFormMemoryClausesID
char & AMDGPURemoveIncompatibleFunctionsID
void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)
void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &)
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
void initializeSILoadStoreOptimizerPass(PassRegistry &)
void initializeSILateBranchLoweringPass(PassRegistry &)
void initializeSIPeepholeSDWAPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAllocaToVector()
char & AMDGPULateCodeGenPrepareID
char & AMDGPUUnifyDivergentExitNodesID
char & SIInsertWaitcntsID
FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)
char & AMDGPUPrintfRuntimeBindingID
char & GCNNSAReassignID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
char & AMDGPUPerfHintAnalysisID
char & SILowerSGPRSpillsID
char & SILateBranchLoweringPassID
char & SIModeRegisterID
FunctionPass * createGCNPreRAOptimizationsPass()
FunctionPass * createSIShrinkInstructionsPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
void initializeSIPostRABundlerPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
Pass * createAMDGPUAttributorLegacyPass()
void initializeSIWholeQuadModePass(PassRegistry &)
FunctionPass * createAMDGPULowerKernelArgumentsPass()
char & AMDGPUInsertDelayAluID
Pass * createAMDGPUAnnotateKernelFeaturesPass()
char & SIOptimizeVGPRLiveRangeID
char & SIOptimizeExecMaskingPreRAID
char & AMDGPULowerModuleLDSLegacyPassID
void initializeSIInsertHardClausesPass(PassRegistry &)
FunctionPass * createSIPostRABundlerPass()
FunctionPass * createSIFormMemoryClausesPass()
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
Pass * createAMDGPUStructurizeCFGPass()
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ModulePass * createAMDGPUPrintfRuntimeBinding()
void initializeSIMemoryLegalizerPass(PassRegistry &)
char & AMDGPUUnifyMetadataID
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &)
FunctionPass * createAMDGPUAnnotateUniformValues()
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
char & SIPreEmitPeepholeID
ModulePass * createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *)
FunctionPass * createSILowerI1CopiesPass()
void initializeGCNRegPressurePrinterPass(PassRegistry &)
char & AMDGPURewriteOutArgumentsID
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
FunctionPass * createAMDGPUCodeGenPreparePass()
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & AMDGPUMachineCFGStructurizerID
char & AMDGPULowerKernelAttributesID
@ None
Not a recurrence.
char & GCNDPPCombineID
char & GCNRegPressurePrinterID
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
char & AMDGPUPromoteAllocaID
FunctionPass * createSIFoldOperandsPass()
char & SIWholeQuadModeID
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
FunctionPass * createAMDGPURewriteUndefForPHILegacyPass()
void initializeSILowerSGPRSpillsPass(PassRegistry &)
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & SIInsertHardClausesID
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
char & GCNCreateVOPDID
void initializeSILowerI1CopiesPass(PassRegistry &)
char & SILowerControlFlowID
char & AMDGPUAtomicOptimizerID
char & SIAnnotateControlFlowPassID
FunctionPass * createLowerWWMCopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & AMDGPUAnnotateUniformValuesPassID
FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()
FunctionPass * createSIMemoryLegalizerPass()
void initializeSIFoldOperandsPass(PassRegistry &)
void initializeSILowerControlFlowPass(PassRegistry &)
char & SIPeepholeSDWAID
char & AMDGPUOpenCLEnqueuedBlockLoweringID
char & SIFixVGPRCopiesID
void initializeAMDGPURegBankSelectPass(PassRegistry &)
char & AMDGPURewriteUndefForPHILegacyPassID
FunctionPass * createSIOptimizeVGPRLiveRangePass()
FunctionPass * createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
FunctionPass * createSIInsertWaitcntsPass()
void initializeGCNDPPCombinePass(PassRegistry &)
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
FunctionPass * createGCNDPPCombinePass()
FunctionPass * createAMDGPULateCodeGenPreparePass()
char & AMDGPUMarkLastScratchLoadID
FunctionPass * createSIFixSGPRCopiesPass()
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
char & AMDGPUPromoteAllocaToVectorID
void initializeAMDGPUInsertDelayAluPass(PassRegistry &)
char & SIOptimizeExecMaskingID
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSIFixControlFlowLiveIntervalsPass()
char & SIFixSGPRCopiesID
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
char & AMDGPULowerKernelArgumentsID
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
char & AMDGPUCodeGenPrepareID
void initializeAMDGPUSetWavePriorityPass(PassRegistry &)
char & SIShrinkInstructionsID
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
char & GCNPreRALongBranchRegID
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
AMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPU.h:246
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
Definition: AMDGPU.h:232
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM)
Definition: AMDGPU.h:72
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_)
Definition: AMDGPU.h:134
const AMDGPUTargetMachine & TM
Definition: AMDGPU.h:133
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
AMDGPUPromoteAllocaPass(TargetMachine &TM)
Definition: AMDGPU.h:215
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM)
Definition: AMDGPU.h:224
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:91