39 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 51 bool processUniformWorkGroupAttribute();
59 bool doInitialization(
CallGraph &CG)
override;
63 return "AMDGPU Annotate Kernel Features";
72 static bool visitConstantExprsRecursively(
84 "Add AMDGPU function attributes",
false,
false)
96 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(
const ConstantExpr *CE) {
97 if (CE->
getOpcode() == Instruction::AddrSpaceCast) {
105 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
109 if (!ConstantExprVisited.
insert(EntryC).second)
115 while (!Stack.
empty()) {
119 if (
const auto *CE = dyn_cast<ConstantExpr>(C)) {
120 if (visitConstantExpr(CE))
130 if (!ConstantExprVisited.
insert(OpC).second)
149 case Intrinsic::amdgcn_workitem_id_x:
150 NonKernelOnly =
true;
151 return "amdgpu-work-item-id-x";
152 case Intrinsic::amdgcn_workgroup_id_x:
153 NonKernelOnly =
true;
154 return "amdgpu-work-group-id-x";
155 case Intrinsic::amdgcn_workitem_id_y:
156 case Intrinsic::r600_read_tidig_y:
157 return "amdgpu-work-item-id-y";
158 case Intrinsic::amdgcn_workitem_id_z:
159 case Intrinsic::r600_read_tidig_z:
160 return "amdgpu-work-item-id-z";
161 case Intrinsic::amdgcn_workgroup_id_y:
162 case Intrinsic::r600_read_tgid_y:
163 return "amdgpu-work-group-id-y";
164 case Intrinsic::amdgcn_workgroup_id_z:
165 case Intrinsic::r600_read_tgid_z:
166 return "amdgpu-work-group-id-z";
167 case Intrinsic::amdgcn_dispatch_ptr:
168 return "amdgpu-dispatch-ptr";
169 case Intrinsic::amdgcn_dispatch_id:
170 return "amdgpu-dispatch-id";
171 case Intrinsic::amdgcn_kernarg_segment_ptr:
172 return "amdgpu-kernarg-segment-ptr";
173 case Intrinsic::amdgcn_implicitarg_ptr:
174 return "amdgpu-implicitarg-ptr";
175 case Intrinsic::amdgcn_queue_ptr:
176 case Intrinsic::amdgcn_is_shared:
177 case Intrinsic::amdgcn_is_private:
179 case Intrinsic::trap:
180 case Intrinsic::debugtrap:
182 return "amdgpu-queue-ptr";
198 bool &NeedQueuePtr) {
201 "amdgpu-work-item-id-x",
"amdgpu-work-item-id-y",
202 "amdgpu-work-item-id-z",
"amdgpu-work-group-id-x",
203 "amdgpu-work-group-id-y",
"amdgpu-work-group-id-z",
204 "amdgpu-dispatch-ptr",
"amdgpu-dispatch-id",
205 "amdgpu-kernarg-segment-ptr",
"amdgpu-implicitarg-ptr"};
207 if (
handleAttr(Parent, Callee,
"amdgpu-queue-ptr"))
214 bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
215 bool Changed =
false;
218 Function *Caller = Node->getFunction();
220 for (
auto I : *Node) {
223 Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
230 bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
235 Callee.
addFnAttr(
"uniform-work-group-size",
"false");
237 Caller.
addFnAttr(
"uniform-work-group-size",
"false");
248 Callee.
addFnAttr(
"uniform-work-group-size",
"true");
252 Callee.
addFnAttr(
"uniform-work-group-size",
"false");
257 Caller.
addFnAttr(
"uniform-work-group-size",
"false");
258 Callee.
addFnAttr(
"uniform-work-group-size",
"false");
264 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(
Function &
F) {
270 bool Changed =
false;
271 bool NeedQueuePtr =
false;
272 bool HaveCall =
false;
294 bool NonKernelOnly =
false;
296 NonKernelOnly, NeedQueuePtr);
297 if (!AttrName.
empty() && (IsFunc || !NonKernelOnly)) {
298 F.addFnAttr(AttrName);
304 if (NeedQueuePtr || HasApertureRegs)
314 for (
const Use &U :
I.operands()) {
319 if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
328 F.addFnAttr(
"amdgpu-queue-ptr");
335 if (HasFlat && !IsFunc && HaveCall) {
336 F.addFnAttr(
"amdgpu-flat-scratch");
343 bool AMDGPUAnnotateKernelFeatures::runOnSCC(
CallGraphSCC &SCC) {
344 bool Changed =
false;
348 if (
I->getNumReferences())
351 processUniformWorkGroupAttribute();
359 Changed |= addFeatureAttributes(*F);
365 bool AMDGPUAnnotateKernelFeatures::doInitialization(
CallGraph &CG) {
366 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
375 return new AMDGPUAnnotateKernelFeatures();
Pass interface - Implemented by all 'passes'.
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
bool hasApertureRegs() const
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
void push_back(const T &Elt)
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
This defines the Use class.
A node in the call graph for a module.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
This class represents a conversion between pointers from one address space to another.
A Use represents the edge between a Value definition and its users.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Type * getType() const
All values are typed, get the type of this value.
static Function * getFunction(Constant *C)
Value * getOperand(unsigned i) const
Address space for private memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
LLVM Basic Block Representation.
Address space for local memory.
This is an important base class in LLVM.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
bool hasFlatAddressSpace() const
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
amdgpu Simplify well known AMD library false FunctionCallee Callee
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
The basic data container for the call graph of a Module of IR.
LLVM_NODISCARD bool empty() const
StringRef getValueAsString() const
Return the attribute's value as a string.
bool hasExactDefinition() const
Return true if this global has an exact defintion.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Primary interface to the complete machine description for the target machine.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
StringRef - Represent a constant reference to a string, i.e.
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)