LLVM  6.0.0svn
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This pass adds target attributes to functions which use intrinsics
11 /// which will impact calling convention lowering.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Triple.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Use.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
39 
40 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
41 
42 using namespace llvm;
43 
44 namespace {
45 
46 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
47 private:
48  const TargetMachine *TM = nullptr;
49  AMDGPUAS AS;
50 
51  bool addFeatureAttributes(Function &F);
52 
53 public:
54  static char ID;
55 
56  AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
57 
58  bool doInitialization(CallGraph &CG) override;
59  bool runOnSCC(CallGraphSCC &SCC) override;
60 
61  StringRef getPassName() const override {
62  return "AMDGPU Annotate Kernel Features";
63  }
64 
65  void getAnalysisUsage(AnalysisUsage &AU) const override {
66  AU.setPreservesAll();
68  }
69 
70  static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
71  static bool visitConstantExprsRecursively(
72  const Constant *EntryC,
73  SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
74  AMDGPUAS AS);
75 };
76 
77 } // end anonymous namespace
78 
80 
82 
83 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
84  "Add AMDGPU function attributes", false, false)
85 
86 
87 // The queue ptr is only needed when casting to flat, not from it.
88 static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
89  return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
90 }
91 
92 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
93  const AMDGPUAS &AS) {
95 }
96 
97 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
98  AMDGPUAS AS) {
99  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
100  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
101  return castRequiresQueuePtr(SrcAS, AS);
102  }
103 
104  return false;
105 }
106 
107 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
108  const Constant *EntryC,
109  SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
110  AMDGPUAS AS) {
111 
112  if (!ConstantExprVisited.insert(EntryC).second)
113  return false;
114 
116  Stack.push_back(EntryC);
117 
118  while (!Stack.empty()) {
119  const Constant *C = Stack.pop_back_val();
120 
121  // Check this constant expression.
122  if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
123  if (visitConstantExpr(CE, AS))
124  return true;
125  }
126 
127  // Visit all sub-expressions.
128  for (const Use &U : C->operands()) {
129  const auto *OpC = dyn_cast<Constant>(U);
130  if (!OpC)
131  continue;
132 
133  if (!ConstantExprVisited.insert(OpC).second)
134  continue;
135 
136  Stack.push_back(OpC);
137  }
138  }
139 
140  return false;
141 }
142 
143 // We do not need to note the x workitem or workgroup id because they are always
144 // initialized.
145 //
146 // TODO: We should not add the attributes if the known compile time workgroup
147 // size is 1 for y/z.
149  bool &NonKernelOnly,
150  bool &IsQueuePtr) {
151  switch (ID) {
152  case Intrinsic::amdgcn_workitem_id_x:
153  NonKernelOnly = true;
154  return "amdgpu-work-item-id-x";
155  case Intrinsic::amdgcn_workgroup_id_x:
156  NonKernelOnly = true;
157  return "amdgpu-work-group-id-x";
158  case Intrinsic::amdgcn_workitem_id_y:
159  case Intrinsic::r600_read_tidig_y:
160  return "amdgpu-work-item-id-y";
161  case Intrinsic::amdgcn_workitem_id_z:
162  case Intrinsic::r600_read_tidig_z:
163  return "amdgpu-work-item-id-z";
164  case Intrinsic::amdgcn_workgroup_id_y:
165  case Intrinsic::r600_read_tgid_y:
166  return "amdgpu-work-group-id-y";
167  case Intrinsic::amdgcn_workgroup_id_z:
168  case Intrinsic::r600_read_tgid_z:
169  return "amdgpu-work-group-id-z";
170  case Intrinsic::amdgcn_dispatch_ptr:
171  return "amdgpu-dispatch-ptr";
172  case Intrinsic::amdgcn_dispatch_id:
173  return "amdgpu-dispatch-id";
174  case Intrinsic::amdgcn_kernarg_segment_ptr:
175  return "amdgpu-kernarg-segment-ptr";
176  case Intrinsic::amdgcn_implicitarg_ptr:
177  return "amdgpu-implicitarg-ptr";
178  case Intrinsic::amdgcn_queue_ptr:
179  case Intrinsic::trap:
180  case Intrinsic::debugtrap:
181  IsQueuePtr = true;
182  return "amdgpu-queue-ptr";
183  default:
184  return "";
185  }
186 }
187 
188 static bool handleAttr(Function &Parent, const Function &Callee,
189  StringRef Name) {
190  if (Callee.hasFnAttribute(Name)) {
191  Parent.addFnAttr(Name);
192  return true;
193  }
194 
195  return false;
196 }
197 
198 static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
199  bool &NeedQueuePtr) {
200  // X ids unnecessarily propagated to kernels.
201  static const StringRef AttrNames[] = {
202  { "amdgpu-work-item-id-x" },
203  { "amdgpu-work-item-id-y" },
204  { "amdgpu-work-item-id-z" },
205  { "amdgpu-work-group-id-x" },
206  { "amdgpu-work-group-id-y" },
207  { "amdgpu-work-group-id-z" },
208  { "amdgpu-dispatch-ptr" },
209  { "amdgpu-dispatch-id" },
210  { "amdgpu-kernarg-segment-ptr" },
211  { "amdgpu-implicitarg-ptr" }
212  };
213 
214  if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
215  NeedQueuePtr = true;
216 
217  for (StringRef AttrName : AttrNames)
218  handleAttr(Parent, Callee, AttrName);
219 }
220 
221 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
222  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
223  bool HasFlat = ST.hasFlatAddressSpace();
224  bool HasApertureRegs = ST.hasApertureRegs();
225  SmallPtrSet<const Constant *, 8> ConstantExprVisited;
226 
227  bool Changed = false;
228  bool NeedQueuePtr = false;
229  bool HaveCall = false;
230  bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
231 
232  for (BasicBlock &BB : F) {
233  for (Instruction &I : BB) {
234  CallSite CS(&I);
235  if (CS) {
237 
238  // TODO: Do something with indirect calls.
239  if (!Callee) {
240  if (!CS.isInlineAsm())
241  HaveCall = true;
242  continue;
243  }
244 
245  Intrinsic::ID IID = Callee->getIntrinsicID();
246  if (IID == Intrinsic::not_intrinsic) {
247  HaveCall = true;
248  copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
249  Changed = true;
250  } else {
251  bool NonKernelOnly = false;
252  StringRef AttrName = intrinsicToAttrName(IID,
253  NonKernelOnly, NeedQueuePtr);
254  if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
255  F.addFnAttr(AttrName);
256  Changed = true;
257  }
258  }
259  }
260 
261  if (NeedQueuePtr || HasApertureRegs)
262  continue;
263 
264  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
265  if (castRequiresQueuePtr(ASC, AS)) {
266  NeedQueuePtr = true;
267  continue;
268  }
269  }
270 
271  for (const Use &U : I.operands()) {
272  const auto *OpC = dyn_cast<Constant>(U);
273  if (!OpC)
274  continue;
275 
276  if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
277  NeedQueuePtr = true;
278  break;
279  }
280  }
281  }
282  }
283 
284  if (NeedQueuePtr) {
285  F.addFnAttr("amdgpu-queue-ptr");
286  Changed = true;
287  }
288 
289  // TODO: We could refine this to captured pointers that could possibly be
290  // accessed by flat instructions. For now this is mostly a poor way of
291  // estimating whether there are calls before argument lowering.
292  if (HasFlat && !IsFunc && HaveCall) {
293  F.addFnAttr("amdgpu-flat-scratch");
294  Changed = true;
295  }
296 
297  return Changed;
298 }
299 
300 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
301  Module &M = SCC.getCallGraph().getModule();
302  Triple TT(M.getTargetTriple());
303 
304  bool Changed = false;
305  for (CallGraphNode *I : SCC) {
306  Function *F = I->getFunction();
307  if (!F || F->isDeclaration())
308  continue;
309 
310  Changed |= addFeatureAttributes(*F);
311  }
312 
313  return Changed;
314 }
315 
316 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
317  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
318  if (!TPC)
319  report_fatal_error("TargetMachine is required");
320 
321  AS = AMDGPU::getAMDGPUAS(CG.getModule());
322  TM = &TPC->getTM<TargetMachine>();
323  return false;
324 }
325 
327  return new AMDGPUAnnotateKernelFeatures();
328 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:233
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1171
bool isInlineAsm() const
Definition: CallSite.h:305
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
AMDGPUAS getAMDGPUAS(const Module &M)
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
This defines the Use class.
A node in the call graph for a module.
Definition: CallGraph.h:165
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:114
This class represents a conversion between pointers from one address space to another.
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
Pass * createAMDGPUAnnotateKernelFeaturesPass()
const CallGraph & getCallGraph()
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:862
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, const AMDGPUAS &AS)
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
bool hasApertureRegs() const
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
op_range operands()
Definition: User.h:222
const AMDGPUAS & AS
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
bool hasFlatAddressSpace() const
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:74
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
#define DEBUG_TYPE
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
aarch64 promote const
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:211
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)