LLVM  15.0.0git
AMDGPUPerfHintAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes if a function potentially memory bound and if a kernel
11 /// kernel may benefit from limiting number of waves to reduce cache thrashing.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUPerfHintAnalysis.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "amdgpu-perf-hint"
33 
34 static cl::opt<unsigned>
35  MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
36  cl::desc("Function mem bound threshold in %"));
37 
38 static cl::opt<unsigned>
39  LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
40  cl::desc("Kernel limit wave threshold in %"));
41 
42 static cl::opt<unsigned>
43  IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
44  cl::desc("Indirect access memory instruction weight"));
45 
46 static cl::opt<unsigned>
47  LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
48  cl::desc("Large stride memory access weight"));
49 
50 static cl::opt<unsigned>
51  LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
52  cl::desc("Large stride memory access threshold"));
53 
54 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
55 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
56 
59 
61  "Analysis if a function is memory bound", true, true)
62 
63 namespace {
64 
65 struct AMDGPUPerfHint {
67 
68 public:
69  AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
70  const TargetLowering *TLI_)
71  : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
72 
73  bool runOnFunction(Function &F);
74 
75 private:
76  struct MemAccessInfo {
77  const Value *V;
78  const Value *Base;
79  int64_t Offset;
80  MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
81  bool isLargeStride(MemAccessInfo &Reference) const;
82 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
83  Printable print() const {
84  return Printable([this](raw_ostream &OS) {
85  OS << "Value: " << *V << '\n'
86  << "Base: " << *Base << " Offset: " << Offset << '\n';
87  });
88  }
89 #endif
90  };
91 
92  MemAccessInfo makeMemAccessInfo(Instruction *) const;
93 
94  MemAccessInfo LastAccess; // Last memory access info
95 
97 
98  const DataLayout *DL;
99 
100  const TargetLowering *TLI;
101 
103  static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
104  static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
105 
106  bool isIndirectAccess(const Instruction *Inst) const;
107 
108  /// Check if the instruction is large stride.
109  /// The purpose is to identify memory access pattern like:
110  /// x = a[i];
111  /// y = a[i+1000];
112  /// z = a[i+2000];
113  /// In the above example, the second and third memory access will be marked
114  /// large stride memory access.
115  bool isLargeStride(const Instruction *Inst);
116 
117  bool isGlobalAddr(const Value *V) const;
118  bool isLocalAddr(const Value *V) const;
119  bool isConstantAddr(const Value *V) const;
120 };
121 
122 static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType(
123  const Instruction *Inst) {
124  if (auto LI = dyn_cast<LoadInst>(Inst))
125  return {LI->getPointerOperand(), LI->getType()};
126  if (auto SI = dyn_cast<StoreInst>(Inst))
127  return {SI->getPointerOperand(), SI->getValueOperand()->getType()};
128  if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst))
129  return {AI->getPointerOperand(), AI->getCompareOperand()->getType()};
130  if (auto AI = dyn_cast<AtomicRMWInst>(Inst))
131  return {AI->getPointerOperand(), AI->getValOperand()->getType()};
132  if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst))
133  return {MI->getRawDest(), Type::getInt8Ty(MI->getContext())};
134 
135  return {nullptr, nullptr};
136 }
137 
138 bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
139  LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
142  if (const Value *MO = getMemoryInstrPtrAndType(Inst).first) {
143  if (isGlobalAddr(MO))
144  WorkSet.insert(MO);
145  }
146 
147  while (!WorkSet.empty()) {
148  const Value *V = *WorkSet.begin();
149  WorkSet.erase(*WorkSet.begin());
150  if (!Visited.insert(V).second)
151  continue;
152  LLVM_DEBUG(dbgs() << " check: " << *V << '\n');
153 
154  if (auto LD = dyn_cast<LoadInst>(V)) {
155  auto M = LD->getPointerOperand();
156  if (isGlobalAddr(M)) {
157  LLVM_DEBUG(dbgs() << " is IA\n");
158  return true;
159  }
160  continue;
161  }
162 
163  if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
164  auto P = GEP->getPointerOperand();
165  WorkSet.insert(P);
166  for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
167  WorkSet.insert(GEP->getOperand(I));
168  continue;
169  }
170 
171  if (auto U = dyn_cast<UnaryInstruction>(V)) {
172  WorkSet.insert(U->getOperand(0));
173  continue;
174  }
175 
176  if (auto BO = dyn_cast<BinaryOperator>(V)) {
177  WorkSet.insert(BO->getOperand(0));
178  WorkSet.insert(BO->getOperand(1));
179  continue;
180  }
181 
182  if (auto S = dyn_cast<SelectInst>(V)) {
183  WorkSet.insert(S->getFalseValue());
184  WorkSet.insert(S->getTrueValue());
185  continue;
186  }
187 
188  if (auto E = dyn_cast<ExtractElementInst>(V)) {
189  WorkSet.insert(E->getVectorOperand());
190  continue;
191  }
192 
193  LLVM_DEBUG(dbgs() << " dropped\n");
194  }
195 
196  LLVM_DEBUG(dbgs() << " is not IA\n");
197  return false;
198 }
199 
200 AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
202 
203  LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
204 
205  for (auto &B : F) {
206  LastAccess = MemAccessInfo();
207  for (auto &I : B) {
208  if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) {
209  unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32);
210  if (isIndirectAccess(&I))
211  FI.IAMInstCost += Size;
212  if (isLargeStride(&I))
213  FI.LSMInstCost += Size;
214  FI.MemInstCost += Size;
215  FI.InstCost += Size;
216  continue;
217  }
218  if (auto *CB = dyn_cast<CallBase>(&I)) {
219  Function *Callee = CB->getCalledFunction();
220  if (!Callee || Callee->isDeclaration()) {
221  ++FI.InstCost;
222  continue;
223  }
224  if (&F == Callee) // Handle immediate recursion
225  continue;
226 
227  auto Loc = FIM.find(Callee);
228  if (Loc == FIM.end())
229  continue;
230 
231  FI.MemInstCost += Loc->second.MemInstCost;
232  FI.InstCost += Loc->second.InstCost;
233  FI.IAMInstCost += Loc->second.IAMInstCost;
234  FI.LSMInstCost += Loc->second.LSMInstCost;
235  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
237  auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
238  AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
239  AM.HasBaseReg = !AM.BaseGV;
240  if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
241  GEP->getPointerAddressSpace()))
242  // Offset will likely be folded into load or store
243  continue;
244  ++FI.InstCost;
245  } else {
246  ++FI.InstCost;
247  }
248  }
249  }
250 
251  return &FI;
252 }
253 
255  const Module &M = *F.getParent();
256  DL = &M.getDataLayout();
257 
258  if (F.hasFnAttribute("amdgpu-wave-limiter") &&
259  F.hasFnAttribute("amdgpu-memory-bound"))
260  return false;
261 
262  const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
263 
264  LLVM_DEBUG(dbgs() << F.getName() << " MemInst cost: " << Info->MemInstCost
265  << '\n'
266  << " IAMInst cost: " << Info->IAMInstCost << '\n'
267  << " LSMInst cost: " << Info->LSMInstCost << '\n'
268  << " TotalInst cost: " << Info->InstCost << '\n');
269 
270  bool Changed = false;
271 
272  if (isMemBound(*Info)) {
273  LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
274  NumMemBound++;
275  F.addFnAttr("amdgpu-memory-bound", "true");
276  Changed = true;
277  }
278 
279  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
280  LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
281  NumLimitWave++;
282  F.addFnAttr("amdgpu-wave-limiter", "true");
283  Changed = true;
284  }
285 
286  return Changed;
287 }
288 
289 bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
290  return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh;
291 }
292 
293 bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
294  return ((FI.MemInstCost + FI.IAMInstCost * IAWeight +
295  FI.LSMInstCost * LSWeight) * 100 / FI.InstCost) > LimitWaveThresh;
296 }
297 
298 bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
299  if (auto PT = dyn_cast<PointerType>(V->getType())) {
300  unsigned As = PT->getAddressSpace();
301  // Flat likely points to global too.
302  return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
303  }
304  return false;
305 }
306 
307 bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
308  if (auto PT = dyn_cast<PointerType>(V->getType()))
309  return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
310  return false;
311 }
312 
313 bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
314  LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');
315 
316  MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
317  bool IsLargeStride = MAI.isLargeStride(LastAccess);
318  if (MAI.Base)
319  LastAccess = std::move(MAI);
320 
321  return IsLargeStride;
322 }
323 
324 AMDGPUPerfHint::MemAccessInfo
325 AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
326  MemAccessInfo MAI;
327  const Value *MO = getMemoryInstrPtrAndType(Inst).first;
328 
329  LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
330  // Do not treat local-addr memory access as large stride.
331  if (isLocalAddr(MO))
332  return MAI;
333 
334  MAI.V = MO;
335  MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
336  return MAI;
337 }
338 
339 bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
340  if (auto PT = dyn_cast<PointerType>(V->getType())) {
341  unsigned As = PT->getAddressSpace();
342  return As == AMDGPUAS::CONSTANT_ADDRESS ||
344  }
345  return false;
346 }
347 
348 bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
349  MemAccessInfo &Reference) const {
350 
351  if (!Base || !Reference.Base || Base != Reference.Base)
352  return false;
353 
354  uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
355  : Reference.Offset - Offset;
356  bool Result = Diff > LargeStrideThresh;
357  LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
358  << print() << "<=>\n"
359  << Reference.print() << "Result:" << Result << '\n');
360  return Result;
361 }
362 } // namespace
363 
365  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
366  if (!TPC)
367  return false;
368 
369  const TargetMachine &TM = TPC->getTM<TargetMachine>();
370 
371  bool Changed = false;
372  for (CallGraphNode *I : SCC) {
373  Function *F = I->getFunction();
374  if (!F || F->isDeclaration())
375  continue;
376 
377  const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
378  AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
379 
380  if (Analyzer.runOnFunction(*F))
381  Changed = true;
382  }
383 
384  return Changed;
385 }
386 
388  auto FI = FIM.find(F);
389  if (FI == FIM.end())
390  return false;
391 
392  return AMDGPUPerfHint::isMemBound(FI->second);
393 }
394 
396  auto FI = FIM.find(F);
397  if (FI == FIM.end())
398  return false;
399 
400  return AMDGPUPerfHint::needLimitWave(FI->second);
401 }
llvm::AMDGPUPerfHintAnalysis::isMemoryBound
bool isMemoryBound(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:387
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
LargeStrideThresh
static cl::opt< unsigned > LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold"))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::AMDGPUPerfHintAnalysis::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
Definition: AMDGPUPerfHintAnalysis.cpp:364
LimitWaveThresh
static cl::opt< unsigned > LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %"))
IntrinsicInst.h
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
llvm::Function
Definition: Function.h:60
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
IAWeight
static cl::opt< unsigned > IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight"))
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
Statistic.h
LSWeight
static cl::opt< unsigned > LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight"))
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::AMDGPUPerfHintAnalysis::needsWaveLimiter
bool needsWaveLimiter(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:395
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPUPerfHintAnalysis::FuncInfo
Definition: AMDGPUPerfHintAnalysis.h:39
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
TargetLowering.h
llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCost
unsigned InstCost
Definition: AMDGPUPerfHintAnalysis.h:41
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2499
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3394
llvm::AMDGPUPerfHintAnalysis::ID
static char ID
Definition: AMDGPUPerfHintAnalysis.h:24
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:366
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:166
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1599
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:769
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCost
unsigned MemInstCost
Definition: AMDGPUPerfHintAnalysis.h:40
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
MemBoundThresh
static cl::opt< unsigned > MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %"))
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCost
unsigned LSMInstCost
Definition: AMDGPUPerfHintAnalysis.h:43
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:225
llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCost
unsigned IAMInstCost
Definition: AMDGPUPerfHintAnalysis.h:42
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, "Analysis if a function is memory bound", true, true) namespace
Definition: AMDGPUPerfHintAnalysis.cpp:60
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::SmallSet::erase
bool erase(const T &V)
Definition: SmallSet.h:209
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:300
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2497
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::ValueMap< const Function *, FuncInfo >
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2498
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:278
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
CallGraph.h
Instructions.h
llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPerfHintAnalysis.cpp:32
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2496
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:157
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::cl::desc
Definition: CommandLine.h:405
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
AMDGPUBaseInfo.h
SmallSet.h