LLVM 23.0.0git
KernelInfo.cpp
Go to the documentation of this file.
1//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the KernelInfoPrinter class used to emit remarks about
10// function properties from a GPU kernel.
11//
12//===----------------------------------------------------------------------===//
13
19#include "llvm/IR/DebugInfo.h"
20#include "llvm/IR/Dominators.h"
22#include "llvm/IR/Metadata.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/PassManager.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "kernel-info"
29
30namespace {
31
32/// Data structure holding function info for kernels.
33class KernelInfo {
34 void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
35
36public:
37 static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
38 TargetMachine *TM);
39
40 /// Whether the function has external linkage and is not a kernel function.
41 bool ExternalNotKernel = false;
42
43 /// Launch bounds.
45
46 /// The number of alloca instructions inside the function, the number of those
47 /// with allocation sizes that cannot be determined at compile time, and the
48 /// sum of the sizes that can be.
49 ///
50 /// With the current implementation for at least some GPU archs,
51 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
52 /// case the implementation changes.
53 int64_t Allocas = 0;
54 int64_t AllocasDyn = 0;
55 int64_t AllocasStaticSizeSum = 0;
56
57 /// Number of direct/indirect calls (anything derived from CallBase).
58 int64_t DirectCalls = 0;
59 int64_t IndirectCalls = 0;
60
61 /// Number of direct calls made from this function to other functions
62 /// defined in this module.
63 int64_t DirectCallsToDefinedFunctions = 0;
64
65 /// Number of direct calls to inline assembly.
66 int64_t InlineAssemblyCalls = 0;
67
68 /// Number of calls of type InvokeInst.
69 int64_t Invokes = 0;
70
71 /// Target-specific flat address space.
72 unsigned FlatAddrspace;
73
74 /// Number of flat address space memory accesses (via load, store, etc.).
75 int64_t FlatAddrspaceAccesses = 0;
76};
77
78} // end anonymous namespace
79
80static void identifyCallee(OptimizationRemark &R, const Module *M,
81 const Value *V, StringRef Kind = "") {
82 SmallString<100> Name; // might be function name or asm expression
83 if (const Function *F = dyn_cast<Function>(V)) {
84 if (auto *SubProgram = F->getSubprogram()) {
85 if (SubProgram->isArtificial())
86 R << "artificial ";
87 Name = SubProgram->getName();
88 }
89 }
90 if (Name.empty()) {
91 raw_svector_ostream OS(Name);
92 V->printAsOperand(OS, /*PrintType=*/false, M);
93 }
94 if (!Kind.empty())
95 R << Kind << " ";
96 R << "'" << Name << "'";
97}
98
100 identifyCallee(R, F.getParent(), &F, "function");
101}
102
103static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
104 const AllocaInst &Alloca,
105 TypeSize::ScalarTy StaticSize) {
106 ORE.emit([&] {
107 StringRef DbgName;
109 bool Artificial = false;
110 auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
111 if (!DVRs.empty()) {
112 const DbgVariableRecord &DVR = **DVRs.begin();
113 DbgName = DVR.getVariable()->getName();
114 Loc = DVR.getDebugLoc();
115 Artificial = DVR.Variable->isArtificial();
116 }
118 Alloca.getParent());
119 R << "in ";
120 identifyFunction(R, Caller);
121 R << ", ";
122 if (Artificial)
123 R << "artificial ";
124 SmallString<20> ValName;
125 raw_svector_ostream OS(ValName);
126 Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
127 R << "alloca ('" << ValName << "') ";
128 if (!DbgName.empty())
129 R << "for '" << DbgName << "' ";
130 else
131 R << "without debug info ";
132 R << "with ";
133 if (StaticSize)
134 R << "static size of " << itostr(StaticSize) << " bytes";
135 else
136 R << "dynamic size";
137 return R;
138 });
139}
140
141static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
142 const CallBase &Call, StringRef CallKind,
143 StringRef RemarkKind) {
144 ORE.emit([&] {
145 OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
146 R << "in ";
147 identifyFunction(R, Caller);
148 R << ", " << CallKind << ", callee is ";
149 identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
150 return R;
151 });
152}
153
155 const Function &Caller,
156 const Instruction &Inst) {
157 ORE.emit([&] {
158 OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
159 R << "in ";
160 identifyFunction(R, Caller);
161 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
162 R << ", '" << II->getCalledFunction()->getName() << "' call";
163 } else {
164 R << ", '" << Inst.getOpcodeName() << "' instruction";
165 }
166 if (!Inst.getType()->isVoidTy()) {
167 SmallString<20> Name;
168 raw_svector_ostream OS(Name);
169 Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
170 R << " ('" << Name << "')";
171 }
172 R << " accesses memory in flat address space";
173 return R;
174 });
175}
176
177void KernelInfo::updateForBB(const BasicBlock &BB,
179 const Function &F = *BB.getParent();
180 const Module &M = *F.getParent();
181 const DataLayout &DL = M.getDataLayout();
182 for (const Instruction &I : BB) {
183 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
184 ++Allocas;
185 TypeSize::ScalarTy StaticSize = 0;
186 if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
187 StaticSize = Size->getFixedValue();
188 assert(StaticSize <=
189 (TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
190 AllocasStaticSizeSum += StaticSize;
191 } else {
192 ++AllocasDyn;
193 }
194 remarkAlloca(ORE, F, *Alloca, StaticSize);
195 } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
197 continue;
198 SmallString<40> CallKind;
199 SmallString<40> RemarkKind;
200 if (Call->isIndirectCall()) {
201 ++IndirectCalls;
202 CallKind += "indirect";
203 RemarkKind += "Indirect";
204 } else {
205 ++DirectCalls;
206 CallKind += "direct";
207 RemarkKind += "Direct";
208 }
209 if (isa<InvokeInst>(Call)) {
210 ++Invokes;
211 CallKind += " invoke";
212 RemarkKind += "Invoke";
213 } else {
214 CallKind += " call";
215 RemarkKind += "Call";
216 }
217 if (!Call->isIndirectCall()) {
218 if (const Function *Callee = Call->getCalledFunction()) {
219 if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
220 ++DirectCallsToDefinedFunctions;
221 CallKind += " to defined function";
222 RemarkKind += "ToDefinedFunction";
223 }
224 } else if (Call->isInlineAsm()) {
225 ++InlineAssemblyCalls;
226 CallKind += " to inline assembly";
227 RemarkKind += "ToInlineAssembly";
228 }
229 }
230 remarkCall(ORE, F, *Call, CallKind, RemarkKind);
231 if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
232 if (MI->getDestAddressSpace() == FlatAddrspace) {
233 ++FlatAddrspaceAccesses;
235 } else if (const AnyMemTransferInst *MT =
237 if (MT->getSourceAddressSpace() == FlatAddrspace) {
238 ++FlatAddrspaceAccesses;
240 }
241 }
242 }
243 } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
244 if (Load->getPointerAddressSpace() == FlatAddrspace) {
245 ++FlatAddrspaceAccesses;
247 }
248 } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
249 if (Store->getPointerAddressSpace() == FlatAddrspace) {
250 ++FlatAddrspaceAccesses;
252 }
253 } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
254 if (At->getPointerAddressSpace() == FlatAddrspace) {
255 ++FlatAddrspaceAccesses;
257 }
258 } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
259 if (At->getPointerAddressSpace() == FlatAddrspace) {
260 ++FlatAddrspaceAccesses;
262 }
263 }
264 }
265}
266
268 StringRef Name, int64_t Value) {
269 ORE.emit([&] {
271 R << "in ";
273 R << ", " << Name << " = " << itostr(Value);
274 return R;
275 });
276}
277
278static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
279 StringRef Name) {
280 if (!F.hasFnAttribute(Name))
281 return std::nullopt;
282 return F.getFnAttributeAsParsedInteger(Name);
283}
284
285void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
286 TargetMachine *TM) {
287 KernelInfo KI;
288 TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
289 KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
290
291 // Record function properties.
292 KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
293 for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
294 if (auto Val = parseFnAttrAsInteger(F, Name))
295 KI.LaunchBounds.push_back({Name, *Val});
296 }
297 TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
298
299 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
300 for (const auto &BB : F)
301 KI.updateForBB(BB, ORE);
302
303#define REMARK_PROPERTY(PROP_NAME) \
304 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
305 REMARK_PROPERTY(ExternalNotKernel);
306 for (auto LB : KI.LaunchBounds)
307 remarkProperty(ORE, F, LB.first, LB.second);
308 REMARK_PROPERTY(Allocas);
309 REMARK_PROPERTY(AllocasStaticSizeSum);
310 REMARK_PROPERTY(AllocasDyn);
311 REMARK_PROPERTY(DirectCalls);
312 REMARK_PROPERTY(IndirectCalls);
313 REMARK_PROPERTY(DirectCallsToDefinedFunctions);
314 REMARK_PROPERTY(InlineAssemblyCalls);
315 REMARK_PROPERTY(Invokes);
316 REMARK_PROPERTY(FlatAddrspaceAccesses);
317#undef REMARK_PROPERTY
318}
319
322 // Skip it if remarks are not enabled as it will do nothing useful.
323 if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
324 KernelInfo::emitKernelInfo(F, AM, TM);
325 return PreservedAnalyses::all();
326}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, const CallBase &Call, StringRef CallKind, StringRef RemarkKind)
static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, const AllocaInst &Alloca, TypeSize::ScalarTy StaticSize)
static std::optional< int64_t > parseFnAttrAsInteger(Function &F, StringRef Name)
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, StringRef Name, int64_t Value)
static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, const Function &Caller, const Instruction &Inst)
#define REMARK_PROPERTY(PROP_NAME)
static void identifyCallee(OptimizationRemark &R, const Module *M, const Value *V, StringRef Kind="")
static void identifyFunction(OptimizationRemark &R, const Function &F)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
This pass exposes codegen information to IR-level passes.
an instruction to allocate memory on the stack
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
StringRef getName() const
DebugLoc getDebugLoc() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
DbgRecordParamRef< DILocalVariable > Variable
DILocalVariable * getVariable() const
A debug info location.
Definition DebugLoc.h:123
const char * getOpcodeName() const
A wrapper class for inspecting calls to intrinsic functions.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Primary interface to the complete machine description for the target machine.
LLVM_ABI unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
LLVM_ABI void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
Collect kernel launch bounds for F into LB.
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
const ParentTy * getParent() const
Definition ilist_node.h:34
A raw_ostream that writes to an SmallVector or SmallString.
CallInst * Call
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition DebugInfo.cpp:48
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
std::string itostr(int64_t X)