LLVM  9.0.0svn
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass adds target attributes to functions which use intrinsics
10 /// which will impact calling convention lowering.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Casting.h"
38 
39 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
40 
41 using namespace llvm;
42 
43 namespace {
44 
45 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
46 private:
47  const TargetMachine *TM = nullptr;
48 
49  bool addFeatureAttributes(Function &F);
50 
51 public:
52  static char ID;
53 
54  AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
55 
56  bool doInitialization(CallGraph &CG) override;
57  bool runOnSCC(CallGraphSCC &SCC) override;
58 
59  StringRef getPassName() const override {
60  return "AMDGPU Annotate Kernel Features";
61  }
62 
63  void getAnalysisUsage(AnalysisUsage &AU) const override {
64  AU.setPreservesAll();
66  }
67 
68  static bool visitConstantExpr(const ConstantExpr *CE);
69  static bool visitConstantExprsRecursively(
70  const Constant *EntryC,
71  SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
72 };
73 
74 } // end anonymous namespace
75 
77 
79 
80 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
81  "Add AMDGPU function attributes", false, false)
82 
83 
84 // The queue ptr is only needed when casting to flat, not from it.
85 static bool castRequiresQueuePtr(unsigned SrcAS) {
86  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
87 }
88 
89 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
91 }
92 
93 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
94  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
95  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
96  return castRequiresQueuePtr(SrcAS);
97  }
98 
99  return false;
100 }
101 
102 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
103  const Constant *EntryC,
104  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
105 
106  if (!ConstantExprVisited.insert(EntryC).second)
107  return false;
108 
110  Stack.push_back(EntryC);
111 
112  while (!Stack.empty()) {
113  const Constant *C = Stack.pop_back_val();
114 
115  // Check this constant expression.
116  if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
117  if (visitConstantExpr(CE))
118  return true;
119  }
120 
121  // Visit all sub-expressions.
122  for (const Use &U : C->operands()) {
123  const auto *OpC = dyn_cast<Constant>(U);
124  if (!OpC)
125  continue;
126 
127  if (!ConstantExprVisited.insert(OpC).second)
128  continue;
129 
130  Stack.push_back(OpC);
131  }
132  }
133 
134  return false;
135 }
136 
137 // We do not need to note the x workitem or workgroup id because they are always
138 // initialized.
139 //
140 // TODO: We should not add the attributes if the known compile time workgroup
141 // size is 1 for y/z.
143  bool &NonKernelOnly,
144  bool &IsQueuePtr) {
145  switch (ID) {
146  case Intrinsic::amdgcn_workitem_id_x:
147  NonKernelOnly = true;
148  return "amdgpu-work-item-id-x";
149  case Intrinsic::amdgcn_workgroup_id_x:
150  NonKernelOnly = true;
151  return "amdgpu-work-group-id-x";
152  case Intrinsic::amdgcn_workitem_id_y:
153  case Intrinsic::r600_read_tidig_y:
154  return "amdgpu-work-item-id-y";
155  case Intrinsic::amdgcn_workitem_id_z:
156  case Intrinsic::r600_read_tidig_z:
157  return "amdgpu-work-item-id-z";
158  case Intrinsic::amdgcn_workgroup_id_y:
159  case Intrinsic::r600_read_tgid_y:
160  return "amdgpu-work-group-id-y";
161  case Intrinsic::amdgcn_workgroup_id_z:
162  case Intrinsic::r600_read_tgid_z:
163  return "amdgpu-work-group-id-z";
164  case Intrinsic::amdgcn_dispatch_ptr:
165  return "amdgpu-dispatch-ptr";
166  case Intrinsic::amdgcn_dispatch_id:
167  return "amdgpu-dispatch-id";
168  case Intrinsic::amdgcn_kernarg_segment_ptr:
169  return "amdgpu-kernarg-segment-ptr";
170  case Intrinsic::amdgcn_implicitarg_ptr:
171  return "amdgpu-implicitarg-ptr";
172  case Intrinsic::amdgcn_queue_ptr:
173  case Intrinsic::trap:
174  case Intrinsic::debugtrap:
175  IsQueuePtr = true;
176  return "amdgpu-queue-ptr";
177  default:
178  return "";
179  }
180 }
181 
182 static bool handleAttr(Function &Parent, const Function &Callee,
183  StringRef Name) {
184  if (Callee.hasFnAttribute(Name)) {
185  Parent.addFnAttr(Name);
186  return true;
187  }
188 
189  return false;
190 }
191 
192 static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
193  bool &NeedQueuePtr) {
194  // X ids unnecessarily propagated to kernels.
195  static const StringRef AttrNames[] = {
196  { "amdgpu-work-item-id-x" },
197  { "amdgpu-work-item-id-y" },
198  { "amdgpu-work-item-id-z" },
199  { "amdgpu-work-group-id-x" },
200  { "amdgpu-work-group-id-y" },
201  { "amdgpu-work-group-id-z" },
202  { "amdgpu-dispatch-ptr" },
203  { "amdgpu-dispatch-id" },
204  { "amdgpu-kernarg-segment-ptr" },
205  { "amdgpu-implicitarg-ptr" }
206  };
207 
208  if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
209  NeedQueuePtr = true;
210 
211  for (StringRef AttrName : AttrNames)
212  handleAttr(Parent, Callee, AttrName);
213 }
214 
215 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
216  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
217  bool HasFlat = ST.hasFlatAddressSpace();
218  bool HasApertureRegs = ST.hasApertureRegs();
219  SmallPtrSet<const Constant *, 8> ConstantExprVisited;
220 
221  bool Changed = false;
222  bool NeedQueuePtr = false;
223  bool HaveCall = false;
224  bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
225 
226  for (BasicBlock &BB : F) {
227  for (Instruction &I : BB) {
228  CallSite CS(&I);
229  if (CS) {
231 
232  // TODO: Do something with indirect calls.
233  if (!Callee) {
234  if (!CS.isInlineAsm())
235  HaveCall = true;
236  continue;
237  }
238 
239  Intrinsic::ID IID = Callee->getIntrinsicID();
240  if (IID == Intrinsic::not_intrinsic) {
241  HaveCall = true;
242  copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
243  Changed = true;
244  } else {
245  bool NonKernelOnly = false;
246  StringRef AttrName = intrinsicToAttrName(IID,
247  NonKernelOnly, NeedQueuePtr);
248  if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
249  F.addFnAttr(AttrName);
250  Changed = true;
251  }
252  }
253  }
254 
255  if (NeedQueuePtr || HasApertureRegs)
256  continue;
257 
258  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
259  if (castRequiresQueuePtr(ASC)) {
260  NeedQueuePtr = true;
261  continue;
262  }
263  }
264 
265  for (const Use &U : I.operands()) {
266  const auto *OpC = dyn_cast<Constant>(U);
267  if (!OpC)
268  continue;
269 
270  if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
271  NeedQueuePtr = true;
272  break;
273  }
274  }
275  }
276  }
277 
278  if (NeedQueuePtr) {
279  F.addFnAttr("amdgpu-queue-ptr");
280  Changed = true;
281  }
282 
283  // TODO: We could refine this to captured pointers that could possibly be
284  // accessed by flat instructions. For now this is mostly a poor way of
285  // estimating whether there are calls before argument lowering.
286  if (HasFlat && !IsFunc && HaveCall) {
287  F.addFnAttr("amdgpu-flat-scratch");
288  Changed = true;
289  }
290 
291  return Changed;
292 }
293 
294 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
295  Module &M = SCC.getCallGraph().getModule();
296  Triple TT(M.getTargetTriple());
297 
298  bool Changed = false;
299  for (CallGraphNode *I : SCC) {
300  Function *F = I->getFunction();
301  if (!F || F->isDeclaration())
302  continue;
303 
304  Changed |= addFeatureAttributes(*F);
305  }
306 
307  return Changed;
308 }
309 
310 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
311  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
312  if (!TPC)
313  report_fatal_error("TargetMachine is required");
314 
315  TM = &TPC->getTM<TargetMachine>();
316  return false;
317 }
318 
320  return new AMDGPUAnnotateKernelFeatures();
321 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:239
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1209
bool hasApertureRegs() const
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
This defines the Use class.
A node in the call graph for a module.
Definition: CallGraph.h:164
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:113
This class represents a conversion between pointers from one address space to another.
bool isInlineAsm() const
Definition: CallSite.h:315
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
Pass * createAMDGPUAnnotateKernelFeaturesPass()
const CallGraph & getCallGraph()
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Value * getOperand(unsigned i) const
Definition: User.h:169
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
op_range operands()
Definition: User.h:237
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
bool hasFlatAddressSpace() const
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:373
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
amdgpu Simplify well known AMD library false FunctionCallee Callee
Address space for local memory.
Definition: AMDGPU.h:259
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
#define DEBUG_TYPE
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:229
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Address space for private memory.
Definition: AMDGPU.h:260
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)