LLVM  14.0.0git
AMDGPUPerfHintAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes if a function potentially memory bound and if a kernel
11 /// kernel may benefit from limiting number of waves to reduce cache thrashing.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUPerfHintAnalysis.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "amdgpu-perf-hint"
33 
34 static cl::opt<unsigned>
35  MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
36  cl::desc("Function mem bound threshold in %"));
37 
38 static cl::opt<unsigned>
39  LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
40  cl::desc("Kernel limit wave threshold in %"));
41 
42 static cl::opt<unsigned>
43  IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
44  cl::desc("Indirect access memory instruction weight"));
45 
46 static cl::opt<unsigned>
47  LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
48  cl::desc("Large stride memory access weight"));
49 
50 static cl::opt<unsigned>
51  LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
52  cl::desc("Large stride memory access threshold"));
53 
54 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
55 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
56 
59 
61  "Analysis if a function is memory bound", true, true)
62 
63 namespace {
64 
65 struct AMDGPUPerfHint {
67 
68 public:
69  AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
70  const TargetLowering *TLI_)
71  : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
72 
73  bool runOnFunction(Function &F);
74 
75 private:
76  struct MemAccessInfo {
77  const Value *V;
78  const Value *Base;
79  int64_t Offset;
80  MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
81  bool isLargeStride(MemAccessInfo &Reference) const;
82 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
83  Printable print() const {
84  return Printable([this](raw_ostream &OS) {
85  OS << "Value: " << *V << '\n'
86  << "Base: " << *Base << " Offset: " << Offset << '\n';
87  });
88  }
89 #endif
90  };
91 
92  MemAccessInfo makeMemAccessInfo(Instruction *) const;
93 
94  MemAccessInfo LastAccess; // Last memory access info
95 
97 
98  const DataLayout *DL;
99 
100  const TargetLowering *TLI;
101 
103  static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
104  static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
105 
106  bool isIndirectAccess(const Instruction *Inst) const;
107 
108  /// Check if the instruction is large stride.
109  /// The purpose is to identify memory access pattern like:
110  /// x = a[i];
111  /// y = a[i+1000];
112  /// z = a[i+2000];
113  /// In the above example, the second and third memory access will be marked
114  /// large stride memory access.
115  bool isLargeStride(const Instruction *Inst);
116 
117  bool isGlobalAddr(const Value *V) const;
118  bool isLocalAddr(const Value *V) const;
119  bool isConstantAddr(const Value *V) const;
120 };
121 
122 static const Value *getMemoryInstrPtr(const Instruction *Inst) {
123  if (auto LI = dyn_cast<LoadInst>(Inst)) {
124  return LI->getPointerOperand();
125  }
126  if (auto SI = dyn_cast<StoreInst>(Inst)) {
127  return SI->getPointerOperand();
128  }
129  if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
130  return AI->getPointerOperand();
131  }
132  if (auto AI = dyn_cast<AtomicRMWInst>(Inst)) {
133  return AI->getPointerOperand();
134  }
135  if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
136  return MI->getRawDest();
137  }
138 
139  return nullptr;
140 }
141 
142 bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
143  LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
146  if (const Value *MO = getMemoryInstrPtr(Inst)) {
147  if (isGlobalAddr(MO))
148  WorkSet.insert(MO);
149  }
150 
151  while (!WorkSet.empty()) {
152  const Value *V = *WorkSet.begin();
153  WorkSet.erase(*WorkSet.begin());
154  if (!Visited.insert(V).second)
155  continue;
156  LLVM_DEBUG(dbgs() << " check: " << *V << '\n');
157 
158  if (auto LD = dyn_cast<LoadInst>(V)) {
159  auto M = LD->getPointerOperand();
160  if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) {
161  LLVM_DEBUG(dbgs() << " is IA\n");
162  return true;
163  }
164  continue;
165  }
166 
167  if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
168  auto P = GEP->getPointerOperand();
169  WorkSet.insert(P);
170  for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
171  WorkSet.insert(GEP->getOperand(I));
172  continue;
173  }
174 
175  if (auto U = dyn_cast<UnaryInstruction>(V)) {
176  WorkSet.insert(U->getOperand(0));
177  continue;
178  }
179 
180  if (auto BO = dyn_cast<BinaryOperator>(V)) {
181  WorkSet.insert(BO->getOperand(0));
182  WorkSet.insert(BO->getOperand(1));
183  continue;
184  }
185 
186  if (auto S = dyn_cast<SelectInst>(V)) {
187  WorkSet.insert(S->getFalseValue());
188  WorkSet.insert(S->getTrueValue());
189  continue;
190  }
191 
192  if (auto E = dyn_cast<ExtractElementInst>(V)) {
193  WorkSet.insert(E->getVectorOperand());
194  continue;
195  }
196 
197  LLVM_DEBUG(dbgs() << " dropped\n");
198  }
199 
200  LLVM_DEBUG(dbgs() << " is not IA\n");
201  return false;
202 }
203 
204 AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
206 
207  LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
208 
209  for (auto &B : F) {
210  LastAccess = MemAccessInfo();
211  for (auto &I : B) {
212  if (const Value *Ptr = getMemoryInstrPtr(&I)) {
213  unsigned Size = divideCeil(
214  Ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
215  32);
216  if (isIndirectAccess(&I))
217  FI.IAMInstCost += Size;
218  if (isLargeStride(&I))
219  FI.LSMInstCost += Size;
220  FI.MemInstCost += Size;
221  FI.InstCost += Size;
222  continue;
223  }
224  if (auto *CB = dyn_cast<CallBase>(&I)) {
225  Function *Callee = CB->getCalledFunction();
226  if (!Callee || Callee->isDeclaration()) {
227  ++FI.InstCost;
228  continue;
229  }
230  if (&F == Callee) // Handle immediate recursion
231  continue;
232 
233  auto Loc = FIM.find(Callee);
234  if (Loc == FIM.end())
235  continue;
236 
237  FI.MemInstCost += Loc->second.MemInstCost;
238  FI.InstCost += Loc->second.InstCost;
239  FI.IAMInstCost += Loc->second.IAMInstCost;
240  FI.LSMInstCost += Loc->second.LSMInstCost;
241  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
243  auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
244  AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
245  AM.HasBaseReg = !AM.BaseGV;
246  if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
247  GEP->getPointerAddressSpace()))
248  // Offset will likely be folded into load or store
249  continue;
250  ++FI.InstCost;
251  } else {
252  ++FI.InstCost;
253  }
254  }
255  }
256 
257  return &FI;
258 }
259 
261  const Module &M = *F.getParent();
262  DL = &M.getDataLayout();
263 
264  if (F.hasFnAttribute("amdgpu-wave-limiter") &&
265  F.hasFnAttribute("amdgpu-memory-bound"))
266  return false;
267 
268  const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
269 
270  LLVM_DEBUG(dbgs() << F.getName() << " MemInst cost: " << Info->MemInstCost
271  << '\n'
272  << " IAMInst cost: " << Info->IAMInstCost << '\n'
273  << " LSMInst cost: " << Info->LSMInstCost << '\n'
274  << " TotalInst cost: " << Info->InstCost << '\n');
275 
276  if (isMemBound(*Info)) {
277  LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
278  NumMemBound++;
279  F.addFnAttr("amdgpu-memory-bound", "true");
280  }
281 
282  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
283  LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
284  NumLimitWave++;
285  F.addFnAttr("amdgpu-wave-limiter", "true");
286  }
287 
288  return true;
289 }
290 
291 bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
292  return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh;
293 }
294 
295 bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
296  return ((FI.MemInstCost + FI.IAMInstCost * IAWeight +
297  FI.LSMInstCost * LSWeight) * 100 / FI.InstCost) > LimitWaveThresh;
298 }
299 
300 bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
301  if (auto PT = dyn_cast<PointerType>(V->getType())) {
302  unsigned As = PT->getAddressSpace();
303  // Flat likely points to global too.
304  return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
305  }
306  return false;
307 }
308 
309 bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
310  if (auto PT = dyn_cast<PointerType>(V->getType()))
311  return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
312  return false;
313 }
314 
315 bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
316  LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');
317 
318  MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
319  bool IsLargeStride = MAI.isLargeStride(LastAccess);
320  if (MAI.Base)
321  LastAccess = std::move(MAI);
322 
323  return IsLargeStride;
324 }
325 
326 AMDGPUPerfHint::MemAccessInfo
327 AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
328  MemAccessInfo MAI;
329  const Value *MO = getMemoryInstrPtr(Inst);
330 
331  LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
332  // Do not treat local-addr memory access as large stride.
333  if (isLocalAddr(MO))
334  return MAI;
335 
336  MAI.V = MO;
337  MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
338  return MAI;
339 }
340 
341 bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
342  if (auto PT = dyn_cast<PointerType>(V->getType())) {
343  unsigned As = PT->getAddressSpace();
344  return As == AMDGPUAS::CONSTANT_ADDRESS ||
346  }
347  return false;
348 }
349 
350 bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
351  MemAccessInfo &Reference) const {
352 
353  if (!Base || !Reference.Base || Base != Reference.Base)
354  return false;
355 
356  uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
357  : Reference.Offset - Offset;
358  bool Result = Diff > LargeStrideThresh;
359  LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
360  << print() << "<=>\n"
361  << Reference.print() << "Result:" << Result << '\n');
362  return Result;
363 }
364 } // namespace
365 
367  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
368  if (!TPC)
369  return false;
370 
371  const TargetMachine &TM = TPC->getTM<TargetMachine>();
372 
373  bool Changed = false;
374  for (CallGraphNode *I : SCC) {
375  Function *F = I->getFunction();
376  if (!F || F->isDeclaration())
377  continue;
378 
379  const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
380  AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
381 
382  if (Analyzer.runOnFunction(*F))
383  Changed = true;
384  }
385 
386  return Changed;
387 }
388 
390  auto FI = FIM.find(F);
391  if (FI == FIM.end())
392  return false;
393 
394  return AMDGPUPerfHint::isMemBound(FI->second);
395 }
396 
398  auto FI = FIM.find(F);
399  if (FI == FIM.end())
400  return false;
401 
402  return AMDGPUPerfHint::needLimitWave(FI->second);
403 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::AMDGPUPerfHintAnalysis::isMemoryBound
bool isMemoryBound(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:389
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
LargeStrideThresh
static cl::opt< unsigned > LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold"))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::AMDGPUPerfHintAnalysis::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
Definition: AMDGPUPerfHintAnalysis.cpp:366
LimitWaveThresh
static cl::opt< unsigned > LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %"))
IntrinsicInst.h
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
IAWeight
static cl::opt< unsigned > IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight"))
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
Statistic.h
LSWeight
static cl::opt< unsigned > LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight"))
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::AMDGPUPerfHintAnalysis::needsWaveLimiter
bool needsWaveLimiter(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:397
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPUPerfHintAnalysis::FuncInfo
Definition: AMDGPUPerfHintAnalysis.h:39
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
TargetLowering.h
llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCost
unsigned InstCost
Definition: AMDGPUPerfHintAnalysis.h:41
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:357
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2365
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3189
llvm::AMDGPUPerfHintAnalysis::ID
static char ID
Definition: AMDGPUPerfHintAnalysis.h:24
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1381
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:742
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCost
unsigned MemInstCost
Definition: AMDGPUPerfHintAnalysis.h:40
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
MemBoundThresh
static cl::opt< unsigned > MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %"))
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCost
unsigned LSMInstCost
Definition: AMDGPUPerfHintAnalysis.h:43
TargetPassConfig.h
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:354
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCost
unsigned IAMInstCost
Definition: AMDGPUPerfHintAnalysis.h:42
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, "Analysis if a function is memory bound", true, true) namespace
Definition: AMDGPUPerfHintAnalysis.cpp:60
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::SmallSet::erase
bool erase(const T &V)
Definition: SmallSet.h:207
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:292
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:353
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2363
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::ValueMap< const Function *, FuncInfo >
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:349
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2364
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:279
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
CallGraph.h
Instructions.h
llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPerfHintAnalysis.cpp:32
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2362
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:350
llvm::cl::desc
Definition: CommandLine.h:414
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
AMDGPUBaseInfo.h
SmallSet.h