LLVM  12.0.0git
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass adds target attributes to functions which use intrinsics
10 /// which will impact calling convention lowering.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
22 
23 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
24 
25 using namespace llvm;
26 
27 namespace {
28 
29 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
30 private:
31  const TargetMachine *TM = nullptr;
33 
34  bool addFeatureAttributes(Function &F);
35  bool processUniformWorkGroupAttribute();
36  bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
37 
38 public:
39  static char ID;
40 
41  AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
42 
43  bool doInitialization(CallGraph &CG) override;
44  bool runOnSCC(CallGraphSCC &SCC) override;
45 
46  StringRef getPassName() const override {
47  return "AMDGPU Annotate Kernel Features";
48  }
49 
50  void getAnalysisUsage(AnalysisUsage &AU) const override {
51  AU.setPreservesAll();
53  }
54 
55  static bool visitConstantExpr(const ConstantExpr *CE);
56  static bool visitConstantExprsRecursively(
57  const Constant *EntryC,
58  SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
59  bool HasApertureRegs);
60 };
61 
62 } // end anonymous namespace
63 
65 
67 
68 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
69  "Add AMDGPU function attributes", false, false)
70 
71 
72 // The queue ptr is only needed when casting to flat, not from it.
73 static bool castRequiresQueuePtr(unsigned SrcAS) {
74  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
75 }
76 
77 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
79 }
80 
81 static bool isDSAddress(const Constant *C) {
82  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
83  if (!GV)
84  return false;
85  unsigned AS = GV->getAddressSpace();
87 }
88 
89 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
90  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
91  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
92  return castRequiresQueuePtr(SrcAS);
93  }
94 
95  return false;
96 }
97 
98 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
99  const Constant *EntryC,
100  SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
101  bool IsFunc, bool HasApertureRegs) {
102 
103  if (!ConstantExprVisited.insert(EntryC).second)
104  return false;
105 
107  Stack.push_back(EntryC);
108 
109  while (!Stack.empty()) {
110  const Constant *C = Stack.pop_back_val();
111 
112  // We need to trap on DS globals in non-entry functions.
113  if (IsFunc && isDSAddress(C))
114  return true;
115 
116  // Check this constant expression.
117  if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
118  if (!HasApertureRegs && visitConstantExpr(CE))
119  return true;
120  }
121 
122  // Visit all sub-expressions.
123  for (const Use &U : C->operands()) {
124  const auto *OpC = dyn_cast<Constant>(U);
125  if (!OpC)
126  continue;
127 
128  if (!ConstantExprVisited.insert(OpC).second)
129  continue;
130 
131  Stack.push_back(OpC);
132  }
133  }
134 
135  return false;
136 }
137 
138 // We do not need to note the x workitem or workgroup id because they are always
139 // initialized.
140 //
141 // TODO: We should not add the attributes if the known compile time workgroup
142 // size is 1 for y/z.
144  bool &NonKernelOnly,
145  bool &IsQueuePtr) {
146  switch (ID) {
147  case Intrinsic::amdgcn_workitem_id_x:
148  NonKernelOnly = true;
149  return "amdgpu-work-item-id-x";
150  case Intrinsic::amdgcn_workgroup_id_x:
151  NonKernelOnly = true;
152  return "amdgpu-work-group-id-x";
153  case Intrinsic::amdgcn_workitem_id_y:
154  case Intrinsic::r600_read_tidig_y:
155  return "amdgpu-work-item-id-y";
156  case Intrinsic::amdgcn_workitem_id_z:
157  case Intrinsic::r600_read_tidig_z:
158  return "amdgpu-work-item-id-z";
159  case Intrinsic::amdgcn_workgroup_id_y:
160  case Intrinsic::r600_read_tgid_y:
161  return "amdgpu-work-group-id-y";
162  case Intrinsic::amdgcn_workgroup_id_z:
163  case Intrinsic::r600_read_tgid_z:
164  return "amdgpu-work-group-id-z";
165  case Intrinsic::amdgcn_dispatch_ptr:
166  return "amdgpu-dispatch-ptr";
167  case Intrinsic::amdgcn_dispatch_id:
168  return "amdgpu-dispatch-id";
169  case Intrinsic::amdgcn_kernarg_segment_ptr:
170  return "amdgpu-kernarg-segment-ptr";
171  case Intrinsic::amdgcn_implicitarg_ptr:
172  return "amdgpu-implicitarg-ptr";
173  case Intrinsic::amdgcn_queue_ptr:
174  case Intrinsic::amdgcn_is_shared:
175  case Intrinsic::amdgcn_is_private:
176  // TODO: Does not require queue ptr on gfx9+
177  case Intrinsic::trap:
178  case Intrinsic::debugtrap:
179  IsQueuePtr = true;
180  return "amdgpu-queue-ptr";
181  default:
182  return "";
183  }
184 }
185 
186 static bool handleAttr(Function &Parent, const Function &Callee,
187  StringRef Name) {
188  if (Callee.hasFnAttribute(Name)) {
189  Parent.addFnAttr(Name);
190  return true;
191  }
192  return false;
193 }
194 
195 static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
196  bool &NeedQueuePtr) {
197  // X ids unnecessarily propagated to kernels.
198  static constexpr StringLiteral AttrNames[] = {
199  "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
200  "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
201  "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
202  "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
203  "amdgpu-implicitarg-ptr"};
204 
205  if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
206  NeedQueuePtr = true;
207 
208  for (StringRef AttrName : AttrNames)
209  handleAttr(Parent, Callee, AttrName);
210 }
211 
212 bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
213  bool Changed = false;
214 
215  for (auto *Node : reverse(NodeList)) {
216  Function *Caller = Node->getFunction();
217 
218  for (auto I : *Node) {
219  Function *Callee = std::get<1>(I)->getFunction();
220  if (Callee)
221  Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
222  }
223  }
224 
225  return Changed;
226 }
227 
228 bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
229  Function &Caller, Function &Callee) {
230 
231  // Check for externally defined function
232  if (!Callee.hasExactDefinition()) {
233  Callee.addFnAttr("uniform-work-group-size", "false");
234  if (!Caller.hasFnAttribute("uniform-work-group-size"))
235  Caller.addFnAttr("uniform-work-group-size", "false");
236 
237  return true;
238  }
239  // Check if the Caller has the attribute
240  if (Caller.hasFnAttribute("uniform-work-group-size")) {
241  // Check if the value of the attribute is true
242  if (Caller.getFnAttribute("uniform-work-group-size")
243  .getValueAsString().equals("true")) {
244  // Propagate the attribute to the Callee, if it does not have it
245  if (!Callee.hasFnAttribute("uniform-work-group-size")) {
246  Callee.addFnAttr("uniform-work-group-size", "true");
247  return true;
248  }
249  } else {
250  Callee.addFnAttr("uniform-work-group-size", "false");
251  return true;
252  }
253  } else {
254  // If the attribute is absent, set it as false
255  Caller.addFnAttr("uniform-work-group-size", "false");
256  Callee.addFnAttr("uniform-work-group-size", "false");
257  return true;
258  }
259  return false;
260 }
261 
262 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
263  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
264  bool HasApertureRegs = ST.hasApertureRegs();
265  SmallPtrSet<const Constant *, 8> ConstantExprVisited;
266 
267  bool HaveStackObjects = false;
268  bool Changed = false;
269  bool NeedQueuePtr = false;
270  bool HaveCall = false;
271  bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
272 
273  for (BasicBlock &BB : F) {
274  for (Instruction &I : BB) {
275  if (isa<AllocaInst>(I)) {
276  HaveStackObjects = true;
277  continue;
278  }
279 
280  if (auto *CB = dyn_cast<CallBase>(&I)) {
281  const Function *Callee =
282  dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
283 
284  // TODO: Do something with indirect calls.
285  if (!Callee) {
286  if (!CB->isInlineAsm())
287  HaveCall = true;
288  continue;
289  }
290 
291  Intrinsic::ID IID = Callee->getIntrinsicID();
292  if (IID == Intrinsic::not_intrinsic) {
293  HaveCall = true;
294  copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
295  Changed = true;
296  } else {
297  bool NonKernelOnly = false;
298 
299  if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
300  F.addFnAttr("amdgpu-kernarg-segment-ptr");
301  } else {
302  StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
303  NeedQueuePtr);
304  if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
305  F.addFnAttr(AttrName);
306  Changed = true;
307  }
308  }
309  }
310  }
311 
312  if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
313  continue;
314 
315  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
316  if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
317  NeedQueuePtr = true;
318  continue;
319  }
320  }
321 
322  for (const Use &U : I.operands()) {
323  const auto *OpC = dyn_cast<Constant>(U);
324  if (!OpC)
325  continue;
326 
327  if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
328  HasApertureRegs)) {
329  NeedQueuePtr = true;
330  break;
331  }
332  }
333  }
334  }
335 
336  if (NeedQueuePtr) {
337  F.addFnAttr("amdgpu-queue-ptr");
338  Changed = true;
339  }
340 
341  // TODO: We could refine this to captured pointers that could possibly be
342  // accessed by flat instructions. For now this is mostly a poor way of
343  // estimating whether there are calls before argument lowering.
344  if (!IsFunc && HaveCall) {
345  F.addFnAttr("amdgpu-calls");
346  Changed = true;
347  }
348 
349  if (HaveStackObjects) {
350  F.addFnAttr("amdgpu-stack-objects");
351  Changed = true;
352  }
353 
354  return Changed;
355 }
356 
357 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
358  bool Changed = false;
359 
360  for (CallGraphNode *I : SCC) {
361  // Build a list of CallGraphNodes from most number of uses to least
362  if (I->getNumReferences())
363  NodeList.push_back(I);
364  else {
365  processUniformWorkGroupAttribute();
366  NodeList.clear();
367  }
368 
369  Function *F = I->getFunction();
370  // Add feature attributes
371  if (!F || F->isDeclaration())
372  continue;
373  Changed |= addFeatureAttributes(*F);
374  }
375 
376  return Changed;
377 }
378 
379 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
380  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
381  if (!TPC)
382  report_fatal_error("TargetMachine is required");
383 
384  TM = &TPC->getTM<TargetMachine>();
385  return false;
386 }
387 
389  return new AMDGPUAnnotateKernelFeatures();
390 }
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
uint64_t CallInst * C
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Address space for local memory.
Definition: AMDGPU.h:369
F(f)
A node in the call graph for a module.
Definition: CallGraph.h:167
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
This class represents a conversion between pointers from one address space to another.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Windows NT (Windows on ARM)
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:156
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
Address space for private memory.
Definition: AMDGPU.h:370
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:872
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
This is an important base class in LLVM.
Definition: Constant.h:41
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
unsigned getAddressSpace() const
Definition: Globals.cpp:112
static bool isDSAddress(const Constant *C)
op_range operands()
Definition: User.h:242
Address space for region memory. (GDS)
Definition: AMDGPU.h:366
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:442
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1116
amdgpu Simplify well known AMD library false FunctionCallee Callee
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:512
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
#define I(x, y, z)
Definition: MD5.cpp:59
#define DEBUG_TYPE
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:245
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)