LLVM  13.0.0git
AMDGPUPerfHintAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes if a function potentially memory bound and if a kernel
11 /// kernel may benefit from limiting number of waves to reduce cache thrashing.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUPerfHintAnalysis.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "amdgpu-perf-hint"
33 
34 static cl::opt<unsigned>
35  MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
36  cl::desc("Function mem bound threshold in %"));
37 
38 static cl::opt<unsigned>
39  LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
40  cl::desc("Kernel limit wave threshold in %"));
41 
42 static cl::opt<unsigned>
43  IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
44  cl::desc("Indirect access memory instruction weight"));
45 
46 static cl::opt<unsigned>
47  LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
48  cl::desc("Large stride memory access weight"));
49 
50 static cl::opt<unsigned>
51  LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
52  cl::desc("Large stride memory access threshold"));
53 
54 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
55 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
56 
59 
61  "Analysis if a function is memory bound", true, true)
62 
63 namespace {
64 
65 struct AMDGPUPerfHint {
67 
68 public:
69  AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
70  const TargetLowering *TLI_)
71  : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
72 
73  bool runOnFunction(Function &F);
74 
75 private:
76  struct MemAccessInfo {
77  const Value *V;
78  const Value *Base;
79  int64_t Offset;
80  MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
81  bool isLargeStride(MemAccessInfo &Reference) const;
82 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
83  Printable print() const {
84  return Printable([this](raw_ostream &OS) {
85  OS << "Value: " << *V << '\n'
86  << "Base: " << *Base << " Offset: " << Offset << '\n';
87  });
88  }
89 #endif
90  };
91 
92  MemAccessInfo makeMemAccessInfo(Instruction *) const;
93 
94  MemAccessInfo LastAccess; // Last memory access info
95 
97 
98  const DataLayout *DL;
99 
100  const TargetLowering *TLI;
101 
103  static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
104  static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
105 
106  bool isIndirectAccess(const Instruction *Inst) const;
107 
108  /// Check if the instruction is large stride.
109  /// The purpose is to identify memory access pattern like:
110  /// x = a[i];
111  /// y = a[i+1000];
112  /// z = a[i+2000];
113  /// In the above example, the second and third memory access will be marked
114  /// large stride memory access.
115  bool isLargeStride(const Instruction *Inst);
116 
117  bool isGlobalAddr(const Value *V) const;
118  bool isLocalAddr(const Value *V) const;
119  bool isConstantAddr(const Value *V) const;
120 };
121 
122 static const Value *getMemoryInstrPtr(const Instruction *Inst) {
123  if (auto LI = dyn_cast<LoadInst>(Inst)) {
124  return LI->getPointerOperand();
125  }
126  if (auto SI = dyn_cast<StoreInst>(Inst)) {
127  return SI->getPointerOperand();
128  }
129  if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
130  return AI->getPointerOperand();
131  }
132  if (auto AI = dyn_cast<AtomicRMWInst>(Inst)) {
133  return AI->getPointerOperand();
134  }
135  if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
136  return MI->getRawDest();
137  }
138 
139  return nullptr;
140 }
141 
142 bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
143  LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
146  if (const Value *MO = getMemoryInstrPtr(Inst)) {
147  if (isGlobalAddr(MO))
148  WorkSet.insert(MO);
149  }
150 
151  while (!WorkSet.empty()) {
152  const Value *V = *WorkSet.begin();
153  WorkSet.erase(*WorkSet.begin());
154  if (!Visited.insert(V).second)
155  continue;
156  LLVM_DEBUG(dbgs() << " check: " << *V << '\n');
157 
158  if (auto LD = dyn_cast<LoadInst>(V)) {
159  auto M = LD->getPointerOperand();
160  if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) {
161  LLVM_DEBUG(dbgs() << " is IA\n");
162  return true;
163  }
164  continue;
165  }
166 
167  if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
168  auto P = GEP->getPointerOperand();
169  WorkSet.insert(P);
170  for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
171  WorkSet.insert(GEP->getOperand(I));
172  continue;
173  }
174 
175  if (auto U = dyn_cast<UnaryInstruction>(V)) {
176  WorkSet.insert(U->getOperand(0));
177  continue;
178  }
179 
180  if (auto BO = dyn_cast<BinaryOperator>(V)) {
181  WorkSet.insert(BO->getOperand(0));
182  WorkSet.insert(BO->getOperand(1));
183  continue;
184  }
185 
186  if (auto S = dyn_cast<SelectInst>(V)) {
187  WorkSet.insert(S->getFalseValue());
188  WorkSet.insert(S->getTrueValue());
189  continue;
190  }
191 
192  if (auto E = dyn_cast<ExtractElementInst>(V)) {
193  WorkSet.insert(E->getVectorOperand());
194  continue;
195  }
196 
197  LLVM_DEBUG(dbgs() << " dropped\n");
198  }
199 
200  LLVM_DEBUG(dbgs() << " is not IA\n");
201  return false;
202 }
203 
204 AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
206 
207  LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
208 
209  for (auto &B : F) {
210  LastAccess = MemAccessInfo();
211  for (auto &I : B) {
212  if (getMemoryInstrPtr(&I)) {
213  if (isIndirectAccess(&I))
214  ++FI.IAMInstCount;
215  if (isLargeStride(&I))
216  ++FI.LSMInstCount;
217  ++FI.MemInstCount;
218  ++FI.InstCount;
219  continue;
220  }
221  if (auto *CB = dyn_cast<CallBase>(&I)) {
222  Function *Callee = CB->getCalledFunction();
223  if (!Callee || Callee->isDeclaration()) {
224  ++FI.InstCount;
225  continue;
226  }
227  if (&F == Callee) // Handle immediate recursion
228  continue;
229 
230  auto Loc = FIM.find(Callee);
231  if (Loc == FIM.end())
232  continue;
233 
234  FI.MemInstCount += Loc->second.MemInstCount;
235  FI.InstCount += Loc->second.InstCount;
236  FI.IAMInstCount += Loc->second.IAMInstCount;
237  FI.LSMInstCount += Loc->second.LSMInstCount;
238  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
240  auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
241  AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
242  AM.HasBaseReg = !AM.BaseGV;
243  if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
244  GEP->getPointerAddressSpace()))
245  // Offset will likely be folded into load or store
246  continue;
247  ++FI.InstCount;
248  } else {
249  ++FI.InstCount;
250  }
251  }
252  }
253 
254  return &FI;
255 }
256 
258  const Module &M = *F.getParent();
259  DL = &M.getDataLayout();
260 
261  if (F.hasFnAttribute("amdgpu-wave-limiter") &&
262  F.hasFnAttribute("amdgpu-memory-bound"))
263  return false;
264 
265  const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
266 
267  LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
268  << '\n'
269  << " IAMInst: " << Info->IAMInstCount << '\n'
270  << " LSMInst: " << Info->LSMInstCount << '\n'
271  << " TotalInst: " << Info->InstCount << '\n');
272 
273  if (isMemBound(*Info)) {
274  LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
275  NumMemBound++;
276  F.addFnAttr("amdgpu-memory-bound", "true");
277  }
278 
279  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
280  LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
281  NumLimitWave++;
282  F.addFnAttr("amdgpu-wave-limiter", "true");
283  }
284 
285  return true;
286 }
287 
288 bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
289  return FI.MemInstCount * 100 / FI.InstCount > MemBoundThresh;
290 }
291 
292 bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
293  return ((FI.MemInstCount + FI.IAMInstCount * IAWeight +
294  FI.LSMInstCount * LSWeight) *
295  100 / FI.InstCount) > LimitWaveThresh;
296 }
297 
298 bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
299  if (auto PT = dyn_cast<PointerType>(V->getType())) {
300  unsigned As = PT->getAddressSpace();
301  // Flat likely points to global too.
302  return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
303  }
304  return false;
305 }
306 
307 bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
308  if (auto PT = dyn_cast<PointerType>(V->getType()))
309  return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
310  return false;
311 }
312 
313 bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
314  LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');
315 
316  MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
317  bool IsLargeStride = MAI.isLargeStride(LastAccess);
318  if (MAI.Base)
319  LastAccess = std::move(MAI);
320 
321  return IsLargeStride;
322 }
323 
324 AMDGPUPerfHint::MemAccessInfo
325 AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
326  MemAccessInfo MAI;
327  const Value *MO = getMemoryInstrPtr(Inst);
328 
329  LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
330  // Do not treat local-addr memory access as large stride.
331  if (isLocalAddr(MO))
332  return MAI;
333 
334  MAI.V = MO;
335  MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
336  return MAI;
337 }
338 
339 bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
340  if (auto PT = dyn_cast<PointerType>(V->getType())) {
341  unsigned As = PT->getAddressSpace();
342  return As == AMDGPUAS::CONSTANT_ADDRESS ||
344  }
345  return false;
346 }
347 
348 bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
349  MemAccessInfo &Reference) const {
350 
351  if (!Base || !Reference.Base || Base != Reference.Base)
352  return false;
353 
354  uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
355  : Reference.Offset - Offset;
356  bool Result = Diff > LargeStrideThresh;
357  LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
358  << print() << "<=>\n"
359  << Reference.print() << "Result:" << Result << '\n');
360  return Result;
361 }
362 } // namespace
363 
365  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
366  if (!TPC)
367  return false;
368 
369  const TargetMachine &TM = TPC->getTM<TargetMachine>();
370 
371  bool Changed = false;
372  for (CallGraphNode *I : SCC) {
373  Function *F = I->getFunction();
374  if (!F || F->isDeclaration())
375  continue;
376 
377  const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
378  AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
379 
380  if (Analyzer.runOnFunction(*F))
381  Changed = true;
382  }
383 
384  return Changed;
385 }
386 
388  auto FI = FIM.find(F);
389  if (FI == FIM.end())
390  return false;
391 
392  return AMDGPUPerfHint::isMemBound(FI->second);
393 }
394 
396  auto FI = FIM.find(F);
397  if (FI == FIM.end())
398  return false;
399 
400  return AMDGPUPerfHint::needLimitWave(FI->second);
401 }
llvm::AMDGPUPerfHintAnalysis::isMemoryBound
bool isMemoryBound(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:387
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm
Definition: AllocatorList.h:23
LargeStrideThresh
static cl::opt< unsigned > LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold"))
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCount
unsigned MemInstCount
Definition: AMDGPUPerfHintAnalysis.h:40
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::AMDGPUPerfHintAnalysis::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...
Definition: AMDGPUPerfHintAnalysis.cpp:364
LimitWaveThresh
static cl::opt< unsigned > LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %"))
IntrinsicInst.h
llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:136
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
IAWeight
static cl::opt< unsigned > IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight"))
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
Statistic.h
LSWeight
static cl::opt< unsigned > LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight"))
llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCount
unsigned LSMInstCount
Definition: AMDGPUPerfHintAnalysis.h:43
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCount
unsigned IAMInstCount
Definition: AMDGPUPerfHintAnalysis.h:42
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::AMDGPUPerfHintAnalysis::needsWaveLimiter
bool needsWaveLimiter(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:395
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:364
llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCount
unsigned InstCount
Definition: AMDGPUPerfHintAnalysis.h:41
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPUPerfHintAnalysis::FuncInfo
Definition: AMDGPUPerfHintAnalysis.h:39
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
TargetLowering.h
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2340
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3158
llvm::AMDGPUPerfHintAnalysis::ID
static char ID
Definition: AMDGPUPerfHintAnalysis.h:24
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:360
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1370
llvm::cl::opt
Definition: CommandLine.h:1422
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
MemBoundThresh
static cl::opt< unsigned > MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %"))
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:223
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, "Analysis if a function is memory bound", true, true) namespace
Definition: AMDGPUPerfHintAnalysis.cpp:60
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::SmallSet::erase
bool erase(const T &V)
Definition: SmallSet.h:207
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:285
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2338
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::ValueMap< const Function *, FuncInfo >
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:359
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2339
llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:156
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:279
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
CallGraph.h
Instructions.h
llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:37
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPerfHintAnalysis.cpp:32
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2337
llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::cl::desc
Definition: CommandLine.h:414
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:363
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:367
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
AMDGPUBaseInfo.h
SmallSet.h