LLVM 20.0.0git
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1//===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass propagates the uniform-work-group-size attribute from
10/// kernels to leaf functions when possible. It also adds additional attributes
11/// to hint ABI lowering optimizations later.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
21
22#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
23
24using namespace llvm;
25
26namespace {
27class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
28private:
29 const TargetMachine *TM = nullptr;
30
31 bool addFeatureAttributes(Function &F);
32
33public:
34 static char ID;
35
36 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
37
38 bool doInitialization(CallGraph &CG) override;
39 bool runOnSCC(CallGraphSCC &SCC) override;
40
41 StringRef getPassName() const override {
42 return "AMDGPU Annotate Kernel Features";
43 }
44
45 void getAnalysisUsage(AnalysisUsage &AU) const override {
46 AU.setPreservesAll();
48 }
49};
50
51} // end anonymous namespace
52
53char AMDGPUAnnotateKernelFeatures::ID = 0;
54
55char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
56
57INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
58 "Add AMDGPU function attributes", false, false)
59
60bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
61 bool HaveStackObjects = false;
62 bool Changed = false;
63 bool HaveCall = false;
64 bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
65
66 for (BasicBlock &BB : F) {
67 for (Instruction &I : BB) {
68 if (isa<AllocaInst>(I)) {
69 HaveStackObjects = true;
70 continue;
71 }
72
73 if (auto *CB = dyn_cast<CallBase>(&I)) {
74 const Function *Callee =
75 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
76
77 // Note the occurrence of indirect call.
78 if (!Callee) {
79 if (!CB->isInlineAsm())
80 HaveCall = true;
81
82 continue;
83 }
84
85 Intrinsic::ID IID = Callee->getIntrinsicID();
86 if (IID == Intrinsic::not_intrinsic) {
87 HaveCall = true;
88 Changed = true;
89 }
90 }
91 }
92 }
93
94 // TODO: We could refine this to captured pointers that could possibly be
95 // accessed by flat instructions. For now this is mostly a poor way of
96 // estimating whether there are calls before argument lowering.
97 if (!IsFunc && HaveCall) {
98 F.addFnAttr("amdgpu-calls");
99 Changed = true;
100 }
101
102 if (HaveStackObjects) {
103 F.addFnAttr("amdgpu-stack-objects");
104 Changed = true;
105 }
106
107 return Changed;
108}
109
110bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
111 bool Changed = false;
112
113 for (CallGraphNode *I : SCC) {
114 Function *F = I->getFunction();
115 // Ignore functions with graphics calling conventions, these are currently
116 // not allowed to have kernel arguments.
117 if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
118 continue;
119 // Add feature attributes
120 Changed |= addFeatureAttributes(*F);
121 }
122
123 return Changed;
124}
125
126bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
127 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
128 if (!TPC)
129 report_fatal_error("TargetMachine is required");
130
131 TM = &TPC->getTM<TargetMachine>();
132 return false;
133}
134
136 return new AMDGPUAnnotateKernelFeatures();
137}
#define DEBUG_TYPE
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
AMD GCN specific subclass of TargetSubtarget.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
A node in the call graph for a module.
Definition: CallGraph.h:165
virtual bool runOnSCC(CallGraphSCC &SCC)=0
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
virtual bool doInitialization(CallGraph &CG)
doInitialization - This method is called before the SCC's of the program has been processed,...
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:71
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isEntryFunctionCC(CallingConv::ID CC)
bool isGraphics(CallingConv::ID cc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & AMDGPUAnnotateKernelFeaturesID
Pass * createAMDGPUAnnotateKernelFeaturesPass()
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167