19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 23 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 35 bool processUniformWorkGroupAttribute();
43 bool doInitialization(
CallGraph &CG)
override;
47 return "AMDGPU Annotate Kernel Features";
56 static bool visitConstantExprsRecursively(
59 bool HasApertureRegs);
69 "Add AMDGPU function attributes",
false,
false)
89 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(
const ConstantExpr *CE) {
90 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
91 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
98 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
101 bool IsFunc,
bool HasApertureRegs) {
103 if (!ConstantExprVisited.
insert(EntryC).second)
107 Stack.push_back(EntryC);
109 while (!
Stack.empty()) {
117 if (
const auto *CE = dyn_cast<ConstantExpr>(
C)) {
118 if (!HasApertureRegs && visitConstantExpr(CE))
124 const auto *OpC = dyn_cast<Constant>(U);
128 if (!ConstantExprVisited.
insert(OpC).second)
131 Stack.push_back(OpC);
147 case Intrinsic::amdgcn_workitem_id_x:
148 NonKernelOnly =
true;
149 return "amdgpu-work-item-id-x";
150 case Intrinsic::amdgcn_workgroup_id_x:
151 NonKernelOnly =
true;
152 return "amdgpu-work-group-id-x";
153 case Intrinsic::amdgcn_workitem_id_y:
154 case Intrinsic::r600_read_tidig_y:
155 return "amdgpu-work-item-id-y";
156 case Intrinsic::amdgcn_workitem_id_z:
157 case Intrinsic::r600_read_tidig_z:
158 return "amdgpu-work-item-id-z";
159 case Intrinsic::amdgcn_workgroup_id_y:
160 case Intrinsic::r600_read_tgid_y:
161 return "amdgpu-work-group-id-y";
162 case Intrinsic::amdgcn_workgroup_id_z:
163 case Intrinsic::r600_read_tgid_z:
164 return "amdgpu-work-group-id-z";
165 case Intrinsic::amdgcn_dispatch_ptr:
166 return "amdgpu-dispatch-ptr";
167 case Intrinsic::amdgcn_dispatch_id:
168 return "amdgpu-dispatch-id";
169 case Intrinsic::amdgcn_kernarg_segment_ptr:
170 return "amdgpu-kernarg-segment-ptr";
171 case Intrinsic::amdgcn_implicitarg_ptr:
172 return "amdgpu-implicitarg-ptr";
173 case Intrinsic::amdgcn_queue_ptr:
174 case Intrinsic::amdgcn_is_shared:
175 case Intrinsic::amdgcn_is_private:
177 case Intrinsic::trap:
178 case Intrinsic::debugtrap:
180 return "amdgpu-queue-ptr";
196 bool &NeedQueuePtr) {
199 "amdgpu-work-item-id-x",
"amdgpu-work-item-id-y",
200 "amdgpu-work-item-id-z",
"amdgpu-work-group-id-x",
201 "amdgpu-work-group-id-y",
"amdgpu-work-group-id-z",
202 "amdgpu-dispatch-ptr",
"amdgpu-dispatch-id",
203 "amdgpu-implicitarg-ptr"};
212 bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
213 bool Changed =
false;
218 for (
auto I : *Node) {
221 Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
228 bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
232 if (!
Callee.hasExactDefinition()) {
233 Callee.addFnAttr(
"uniform-work-group-size",
"false");
234 if (!
Caller.hasFnAttribute(
"uniform-work-group-size"))
235 Caller.addFnAttr(
"uniform-work-group-size",
"false");
240 if (
Caller.hasFnAttribute(
"uniform-work-group-size")) {
242 if (
Caller.getFnAttribute(
"uniform-work-group-size")
243 .getValueAsString().equals(
"true")) {
245 if (!
Callee.hasFnAttribute(
"uniform-work-group-size")) {
246 Callee.addFnAttr(
"uniform-work-group-size",
"true");
250 Callee.addFnAttr(
"uniform-work-group-size",
"false");
255 Caller.addFnAttr(
"uniform-work-group-size",
"false");
256 Callee.addFnAttr(
"uniform-work-group-size",
"false");
262 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(
Function &
F) {
264 bool HasApertureRegs =
ST.hasApertureRegs();
267 bool HaveStackObjects =
false;
268 bool Changed =
false;
269 bool NeedQueuePtr =
false;
270 bool HaveCall =
false;
275 if (isa<AllocaInst>(
I)) {
276 HaveStackObjects =
true;
280 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
282 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
286 if (!CB->isInlineAsm())
297 bool NonKernelOnly =
false;
299 if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
300 F.addFnAttr(
"amdgpu-kernarg-segment-ptr");
304 if (!AttrName.
empty() && (IsFunc || !NonKernelOnly)) {
305 F.addFnAttr(AttrName);
312 if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
322 for (
const Use &U :
I.operands()) {
323 const auto *OpC = dyn_cast<Constant>(U);
327 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
337 F.addFnAttr(
"amdgpu-queue-ptr");
344 if (!IsFunc && HaveCall) {
345 F.addFnAttr(
"amdgpu-calls");
349 if (HaveStackObjects) {
350 F.addFnAttr(
"amdgpu-stack-objects");
357 bool AMDGPUAnnotateKernelFeatures::runOnSCC(
CallGraphSCC &SCC) {
358 bool Changed =
false;
362 if (
I->getNumReferences())
365 processUniformWorkGroupAttribute();
371 if (!
F ||
F->isDeclaration())
373 Changed |= addFeatureAttributes(*
F);
379 bool AMDGPUAnnotateKernelFeatures::doInitialization(
CallGraph &CG) {
380 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
389 return new AMDGPUAnnotateKernelFeatures();
Pass interface - Implemented by all 'passes'.
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
Address space for local memory.
A node in the call graph for a module.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
This class represents a conversion between pointers from one address space to another.
A Use represents the edge between a Value definition and its users.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Windows NT (Windows on ARM)
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Address space for private memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
LLVM Basic Block Representation.
This is an important base class in LLVM.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
unsigned getAddressSpace() const
static bool isDSAddress(const Constant *C)
Address space for region memory. (GDS)
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
amdgpu Simplify well known AMD library false FunctionCallee Callee
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
The basic data container for the call graph of a Module of IR.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Primary interface to the complete machine description for the target machine.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
StringRef - Represent a constant reference to a string, i.e.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)