LLVM  10.0.0svn
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass adds target attributes to functions which use intrinsics
10 /// which will impact calling convention lowering.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Casting.h"
38 
39 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
40 
41 using namespace llvm;
42 
43 namespace {
44 
45 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
46 private:
47  const TargetMachine *TM = nullptr;
49 
50  bool addFeatureAttributes(Function &F);
51  bool processUniformWorkGroupAttribute();
52  bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
53 
54 public:
55  static char ID;
56 
57  AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
58 
59  bool doInitialization(CallGraph &CG) override;
60  bool runOnSCC(CallGraphSCC &SCC) override;
61 
62  StringRef getPassName() const override {
63  return "AMDGPU Annotate Kernel Features";
64  }
65 
66  void getAnalysisUsage(AnalysisUsage &AU) const override {
67  AU.setPreservesAll();
69  }
70 
71  static bool visitConstantExpr(const ConstantExpr *CE);
72  static bool visitConstantExprsRecursively(
73  const Constant *EntryC,
74  SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
75 };
76 
77 } // end anonymous namespace
78 
80 
82 
83 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
84  "Add AMDGPU function attributes", false, false)
85 
86 
87 // The queue ptr is only needed when casting to flat, not from it.
88 static bool castRequiresQueuePtr(unsigned SrcAS) {
89  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
90 }
91 
92 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
94 }
95 
96 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
97  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
98  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
99  return castRequiresQueuePtr(SrcAS);
100  }
101 
102  return false;
103 }
104 
105 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
106  const Constant *EntryC,
107  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
108 
109  if (!ConstantExprVisited.insert(EntryC).second)
110  return false;
111 
113  Stack.push_back(EntryC);
114 
115  while (!Stack.empty()) {
116  const Constant *C = Stack.pop_back_val();
117 
118  // Check this constant expression.
119  if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
120  if (visitConstantExpr(CE))
121  return true;
122  }
123 
124  // Visit all sub-expressions.
125  for (const Use &U : C->operands()) {
126  const auto *OpC = dyn_cast<Constant>(U);
127  if (!OpC)
128  continue;
129 
130  if (!ConstantExprVisited.insert(OpC).second)
131  continue;
132 
133  Stack.push_back(OpC);
134  }
135  }
136 
137  return false;
138 }
139 
140 // We do not need to note the x workitem or workgroup id because they are always
141 // initialized.
142 //
143 // TODO: We should not add the attributes if the known compile time workgroup
144 // size is 1 for y/z.
146  bool &NonKernelOnly,
147  bool &IsQueuePtr) {
148  switch (ID) {
149  case Intrinsic::amdgcn_workitem_id_x:
150  NonKernelOnly = true;
151  return "amdgpu-work-item-id-x";
152  case Intrinsic::amdgcn_workgroup_id_x:
153  NonKernelOnly = true;
154  return "amdgpu-work-group-id-x";
155  case Intrinsic::amdgcn_workitem_id_y:
156  case Intrinsic::r600_read_tidig_y:
157  return "amdgpu-work-item-id-y";
158  case Intrinsic::amdgcn_workitem_id_z:
159  case Intrinsic::r600_read_tidig_z:
160  return "amdgpu-work-item-id-z";
161  case Intrinsic::amdgcn_workgroup_id_y:
162  case Intrinsic::r600_read_tgid_y:
163  return "amdgpu-work-group-id-y";
164  case Intrinsic::amdgcn_workgroup_id_z:
165  case Intrinsic::r600_read_tgid_z:
166  return "amdgpu-work-group-id-z";
167  case Intrinsic::amdgcn_dispatch_ptr:
168  return "amdgpu-dispatch-ptr";
169  case Intrinsic::amdgcn_dispatch_id:
170  return "amdgpu-dispatch-id";
171  case Intrinsic::amdgcn_kernarg_segment_ptr:
172  return "amdgpu-kernarg-segment-ptr";
173  case Intrinsic::amdgcn_implicitarg_ptr:
174  return "amdgpu-implicitarg-ptr";
175  case Intrinsic::amdgcn_queue_ptr:
176  case Intrinsic::trap:
177  case Intrinsic::debugtrap:
178  IsQueuePtr = true;
179  return "amdgpu-queue-ptr";
180  default:
181  return "";
182  }
183 }
184 
185 static bool handleAttr(Function &Parent, const Function &Callee,
186  StringRef Name) {
187  if (Callee.hasFnAttribute(Name)) {
188  Parent.addFnAttr(Name);
189  return true;
190  }
191  return false;
192 }
193 
194 static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
195  bool &NeedQueuePtr) {
196  // X ids unnecessarily propagated to kernels.
197  static const StringRef AttrNames[] = {
198  { "amdgpu-work-item-id-x" },
199  { "amdgpu-work-item-id-y" },
200  { "amdgpu-work-item-id-z" },
201  { "amdgpu-work-group-id-x" },
202  { "amdgpu-work-group-id-y" },
203  { "amdgpu-work-group-id-z" },
204  { "amdgpu-dispatch-ptr" },
205  { "amdgpu-dispatch-id" },
206  { "amdgpu-kernarg-segment-ptr" },
207  { "amdgpu-implicitarg-ptr" }
208  };
209 
210  if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
211  NeedQueuePtr = true;
212 
213  for (StringRef AttrName : AttrNames)
214  handleAttr(Parent, Callee, AttrName);
215 }
216 
217 bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
218  bool Changed = false;
219 
220  for (auto *Node : reverse(NodeList)) {
221  Function *Caller = Node->getFunction();
222 
223  for (auto I : *Node) {
224  Function *Callee = std::get<1>(I)->getFunction();
225  if (Callee)
226  Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
227  }
228  }
229 
230  return Changed;
231 }
232 
233 bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
234  Function &Caller, Function &Callee) {
235 
236  // Check for externally defined function
237  if (!Callee.hasExactDefinition()) {
238  Callee.addFnAttr("uniform-work-group-size", "false");
239  if (!Caller.hasFnAttribute("uniform-work-group-size"))
240  Caller.addFnAttr("uniform-work-group-size", "false");
241 
242  return true;
243  }
244  // Check if the Caller has the attribute
245  if (Caller.hasFnAttribute("uniform-work-group-size")) {
246  // Check if the value of the attribute is true
247  if (Caller.getFnAttribute("uniform-work-group-size")
248  .getValueAsString().equals("true")) {
249  // Propagate the attribute to the Callee, if it does not have it
250  if (!Callee.hasFnAttribute("uniform-work-group-size")) {
251  Callee.addFnAttr("uniform-work-group-size", "true");
252  return true;
253  }
254  } else {
255  Callee.addFnAttr("uniform-work-group-size", "false");
256  return true;
257  }
258  } else {
259  // If the attribute is absent, set it as false
260  Caller.addFnAttr("uniform-work-group-size", "false");
261  Callee.addFnAttr("uniform-work-group-size", "false");
262  return true;
263  }
264  return false;
265 }
266 
267 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
268  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
269  bool HasFlat = ST.hasFlatAddressSpace();
270  bool HasApertureRegs = ST.hasApertureRegs();
271  SmallPtrSet<const Constant *, 8> ConstantExprVisited;
272 
273  bool Changed = false;
274  bool NeedQueuePtr = false;
275  bool HaveCall = false;
276  bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
277 
278  for (BasicBlock &BB : F) {
279  for (Instruction &I : BB) {
280  CallSite CS(&I);
281  if (CS) {
282  Function *Callee = CS.getCalledFunction();
283 
284  // TODO: Do something with indirect calls.
285  if (!Callee) {
286  if (!CS.isInlineAsm())
287  HaveCall = true;
288  continue;
289  }
290 
291  Intrinsic::ID IID = Callee->getIntrinsicID();
292  if (IID == Intrinsic::not_intrinsic) {
293  HaveCall = true;
294  copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
295  Changed = true;
296  } else {
297  bool NonKernelOnly = false;
298  StringRef AttrName = intrinsicToAttrName(IID,
299  NonKernelOnly, NeedQueuePtr);
300  if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
301  F.addFnAttr(AttrName);
302  Changed = true;
303  }
304  }
305  }
306 
307  if (NeedQueuePtr || HasApertureRegs)
308  continue;
309 
310  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
311  if (castRequiresQueuePtr(ASC)) {
312  NeedQueuePtr = true;
313  continue;
314  }
315  }
316 
317  for (const Use &U : I.operands()) {
318  const auto *OpC = dyn_cast<Constant>(U);
319  if (!OpC)
320  continue;
321 
322  if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
323  NeedQueuePtr = true;
324  break;
325  }
326  }
327  }
328  }
329 
330  if (NeedQueuePtr) {
331  F.addFnAttr("amdgpu-queue-ptr");
332  Changed = true;
333  }
334 
335  // TODO: We could refine this to captured pointers that could possibly be
336  // accessed by flat instructions. For now this is mostly a poor way of
337  // estimating whether there are calls before argument lowering.
338  if (HasFlat && !IsFunc && HaveCall) {
339  F.addFnAttr("amdgpu-flat-scratch");
340  Changed = true;
341  }
342 
343  return Changed;
344 }
345 
346 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
347  bool Changed = false;
348 
349  for (CallGraphNode *I : SCC) {
350  // Build a list of CallGraphNodes from most number of uses to least
351  if (I->getNumReferences())
352  NodeList.push_back(I);
353  else {
354  processUniformWorkGroupAttribute();
355  NodeList.clear();
356  }
357 
358  Function *F = I->getFunction();
359  // Add feature attributes
360  if (!F || F->isDeclaration())
361  continue;
362  Changed |= addFeatureAttributes(*F);
363  }
364 
365  return Changed;
366 }
367 
368 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
369  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
370  if (!TPC)
371  report_fatal_error("TargetMachine is required");
372 
373  TM = &TPC->getTM<TargetMachine>();
374  return false;
375 }
376 
378  return new AMDGPUAnnotateKernelFeatures();
379 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1209
bool hasApertureRegs() const
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Address space for local memory.
Definition: AMDGPU.h:274
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
This defines the Use class.
A node in the call graph for a module.
Definition: CallGraph.h:164
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
This class represents a conversion between pointers from one address space to another.
bool isInlineAsm() const
Definition: CallSite.h:315
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
Value * getOperand(unsigned i) const
Definition: User.h:169
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
op_range operands()
Definition: User.h:237
Address space for private memory.
Definition: AMDGPU.h:275
bool hasFlatAddressSpace() const
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
amdgpu Simplify well known AMD library false FunctionCallee Callee
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:512
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:160
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:223
bool hasExactDefinition() const
Return true if this global has an exact defintion.
Definition: GlobalValue.h:416
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
#define DEBUG_TYPE
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:229
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)