LLVM  15.0.0git
AMDGPUResourceUsageAnalysis.cpp
Go to the documentation of this file.
1 //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Analyzes how many registers and other resources are used by
11 /// functions.
12 ///
13 /// The results of this analysis are used to fill the register usage, flat
14 /// usage, etc. into hardware registers.
15 ///
16 /// The analysis takes callees into account. E.g. if a function A that needs 10
17 /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18 /// will return 20.
19 /// It is assumed that an indirect call can go into any function except
20 /// hardware-entrypoints. Therefore the register usage of functions with
21 /// indirect calls is estimated as the maximum of all non-entrypoint functions
22 /// in the module.
23 ///
24 //===----------------------------------------------------------------------===//
25 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "SIMachineFunctionInfo.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalValue.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 
41 #define DEBUG_TYPE "amdgpu-resource-usage"
42 
45 
46 // We need to tell the runtime some amount ahead of time if we don't know the
47 // true stack size. Assume a smaller number if this is only due to dynamic /
48 // non-entry block allocas.
50  "amdgpu-assume-external-call-stack-size",
51  cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
52  cl::init(16384));
53 
55  "amdgpu-assume-dynamic-stack-object-size",
56  cl::desc("Assumed extra stack use if there are any "
57  "variable sized objects (in bytes)"),
58  cl::Hidden, cl::init(4096));
59 
61  "Function register usage analysis", true, true)
62 
63 static const Function *getCalleeFunction(const MachineOperand &Op) {
64  if (Op.isImm()) {
65  assert(Op.getImm() == 0);
66  return nullptr;
67  }
68  if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
69  return cast<Function>(GA->getOperand(0));
70  return cast<Function>(Op.getGlobal());
71 }
72 
74  const SIInstrInfo &TII, unsigned Reg) {
75  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
76  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
77  return true;
78  }
79 
80  return false;
81 }
82 
84  const GCNSubtarget &ST) const {
85  return NumExplicitSGPR +
86  IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
87  ST.getTargetID().isXnackOnOrAny());
88 }
89 
91  const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
92  return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
93 }
94 
96  const GCNSubtarget &ST) const {
97  return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
98 }
99 
101  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
102  if (!TPC)
103  return false;
104 
105  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106  const TargetMachine &TM = TPC->getTM<TargetMachine>();
107  bool HasIndirectCall = false;
108 
109  CallGraph CG = CallGraph(M);
110  auto End = po_end(&CG);
111 
112  for (auto IT = po_begin(&CG); IT != End; ++IT) {
113  Function *F = IT->getFunction();
114  if (!F || F->isDeclaration())
115  continue;
116 
118  assert(MF && "function must have been generated already");
119 
120  auto CI = CallGraphResourceInfo.insert(
121  std::make_pair(F, SIFunctionResourceInfo()));
122  SIFunctionResourceInfo &Info = CI.first->second;
123  assert(CI.second && "should only be called once per function");
124  Info = analyzeResourceUsage(*MF, TM);
125  HasIndirectCall |= Info.HasIndirectCall;
126  }
127 
128  if (HasIndirectCall)
129  propagateIndirectCallRegisterUsage();
130 
131  return false;
132 }
133 
135 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
136  const MachineFunction &MF, const TargetMachine &TM) const {
137  SIFunctionResourceInfo Info;
138 
140  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
141  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
142  const MachineRegisterInfo &MRI = MF.getRegInfo();
143  const SIInstrInfo *TII = ST.getInstrInfo();
144  const SIRegisterInfo &TRI = TII->getRegisterInfo();
145 
146  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
147  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
150 
151  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
152  // instructions aren't used to access the scratch buffer. Inline assembly may
153  // need it though.
154  //
155  // If we only have implicit uses of flat_scr on flat instructions, it is not
156  // really needed.
157  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
158  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
159  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
160  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
161  Info.UsesFlatScratch = false;
162  }
163 
164  Info.PrivateSegmentSize = FrameInfo.getStackSize();
165 
166  // Assume a big number if there are any unknown sized objects.
167  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
168  if (Info.HasDynamicallySizedStack)
169  Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
170 
171  if (MFI->isStackRealigned())
172  Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
173 
174  Info.UsesVCC =
175  MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
176 
177  // If there are no calls, MachineRegisterInfo can tell us the used register
178  // count easily.
179  // A tail call isn't considered a call for MachineFrameInfo's purposes.
180  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
181  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
182  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
183  if (MRI.isPhysRegUsed(Reg)) {
184  HighestVGPRReg = Reg;
185  break;
186  }
187  }
188 
189  if (ST.hasMAIInsts()) {
190  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
191  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
192  if (MRI.isPhysRegUsed(Reg)) {
193  HighestAGPRReg = Reg;
194  break;
195  }
196  }
197  Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
198  ? 0
199  : TRI.getHWRegIndex(HighestAGPRReg) + 1;
200  }
201 
202  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
203  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
204  if (MRI.isPhysRegUsed(Reg)) {
205  HighestSGPRReg = Reg;
206  break;
207  }
208  }
209 
210  // We found the maximum register index. They start at 0, so add one to get
211  // the number of registers.
212  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
213  ? 0
214  : TRI.getHWRegIndex(HighestVGPRReg) + 1;
215  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
216  ? 0
217  : TRI.getHWRegIndex(HighestSGPRReg) + 1;
218 
219  return Info;
220  }
221 
222  int32_t MaxVGPR = -1;
223  int32_t MaxAGPR = -1;
224  int32_t MaxSGPR = -1;
225  uint64_t CalleeFrameSize = 0;
226 
227  for (const MachineBasicBlock &MBB : MF) {
228  for (const MachineInstr &MI : MBB) {
229  // TODO: Check regmasks? Do they occur anywhere except calls?
230  for (const MachineOperand &MO : MI.operands()) {
231  unsigned Width = 0;
232  bool IsSGPR = false;
233  bool IsAGPR = false;
234 
235  if (!MO.isReg())
236  continue;
237 
238  Register Reg = MO.getReg();
239  switch (Reg) {
240  case AMDGPU::EXEC:
241  case AMDGPU::EXEC_LO:
242  case AMDGPU::EXEC_HI:
243  case AMDGPU::SCC:
244  case AMDGPU::M0:
245  case AMDGPU::M0_LO16:
246  case AMDGPU::M0_HI16:
247  case AMDGPU::SRC_SHARED_BASE:
248  case AMDGPU::SRC_SHARED_LIMIT:
249  case AMDGPU::SRC_PRIVATE_BASE:
250  case AMDGPU::SRC_PRIVATE_LIMIT:
251  case AMDGPU::SGPR_NULL:
252  case AMDGPU::SGPR_NULL64:
253  case AMDGPU::MODE:
254  continue;
255 
256  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
257  llvm_unreachable("src_pops_exiting_wave_id should not be used");
258 
259  case AMDGPU::NoRegister:
260  assert(MI.isDebugInstr() &&
261  "Instruction uses invalid noreg register");
262  continue;
263 
264  case AMDGPU::VCC:
265  case AMDGPU::VCC_LO:
266  case AMDGPU::VCC_HI:
267  case AMDGPU::VCC_LO_LO16:
268  case AMDGPU::VCC_LO_HI16:
269  case AMDGPU::VCC_HI_LO16:
270  case AMDGPU::VCC_HI_HI16:
271  Info.UsesVCC = true;
272  continue;
273 
274  case AMDGPU::FLAT_SCR:
275  case AMDGPU::FLAT_SCR_LO:
276  case AMDGPU::FLAT_SCR_HI:
277  continue;
278 
279  case AMDGPU::XNACK_MASK:
280  case AMDGPU::XNACK_MASK_LO:
281  case AMDGPU::XNACK_MASK_HI:
282  llvm_unreachable("xnack_mask registers should not be used");
283 
284  case AMDGPU::LDS_DIRECT:
285  llvm_unreachable("lds_direct register should not be used");
286 
287  case AMDGPU::TBA:
288  case AMDGPU::TBA_LO:
289  case AMDGPU::TBA_HI:
290  case AMDGPU::TMA:
291  case AMDGPU::TMA_LO:
292  case AMDGPU::TMA_HI:
293  llvm_unreachable("trap handler registers should not be used");
294 
295  case AMDGPU::SRC_VCCZ:
296  llvm_unreachable("src_vccz register should not be used");
297 
298  case AMDGPU::SRC_EXECZ:
299  llvm_unreachable("src_execz register should not be used");
300 
301  case AMDGPU::SRC_SCC:
302  llvm_unreachable("src_scc register should not be used");
303 
304  default:
305  break;
306  }
307 
308  if (AMDGPU::SReg_32RegClass.contains(Reg) ||
309  AMDGPU::SReg_LO16RegClass.contains(Reg) ||
310  AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
311  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
312  "trap handler registers should not be used");
313  IsSGPR = true;
314  Width = 1;
315  } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
316  AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
317  AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
318  IsSGPR = false;
319  Width = 1;
320  } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
321  AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
322  IsSGPR = false;
323  IsAGPR = true;
324  Width = 1;
325  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
326  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
327  "trap handler registers should not be used");
328  IsSGPR = true;
329  Width = 2;
330  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
331  IsSGPR = false;
332  Width = 2;
333  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
334  IsSGPR = false;
335  IsAGPR = true;
336  Width = 2;
337  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
338  IsSGPR = false;
339  Width = 3;
340  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
341  IsSGPR = true;
342  Width = 3;
343  } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
344  IsSGPR = false;
345  IsAGPR = true;
346  Width = 3;
347  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
348  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
349  "trap handler registers should not be used");
350  IsSGPR = true;
351  Width = 4;
352  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
353  IsSGPR = false;
354  Width = 4;
355  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
356  IsSGPR = false;
357  IsAGPR = true;
358  Width = 4;
359  } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
360  IsSGPR = false;
361  Width = 5;
362  } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
363  IsSGPR = true;
364  Width = 5;
365  } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
366  IsSGPR = false;
367  IsAGPR = true;
368  Width = 5;
369  } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
370  IsSGPR = false;
371  Width = 6;
372  } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
373  IsSGPR = true;
374  Width = 6;
375  } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
376  IsSGPR = false;
377  IsAGPR = true;
378  Width = 6;
379  } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
380  IsSGPR = false;
381  Width = 7;
382  } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
383  IsSGPR = true;
384  Width = 7;
385  } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
386  IsSGPR = false;
387  IsAGPR = true;
388  Width = 7;
389  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
390  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
391  "trap handler registers should not be used");
392  IsSGPR = true;
393  Width = 8;
394  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
395  IsSGPR = false;
396  Width = 8;
397  } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
398  IsSGPR = false;
399  IsAGPR = true;
400  Width = 8;
401  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
402  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
403  "trap handler registers should not be used");
404  IsSGPR = true;
405  Width = 16;
406  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
407  IsSGPR = false;
408  Width = 16;
409  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
410  IsSGPR = false;
411  IsAGPR = true;
412  Width = 16;
413  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
414  IsSGPR = true;
415  Width = 32;
416  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
417  IsSGPR = false;
418  Width = 32;
419  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
420  IsSGPR = false;
421  IsAGPR = true;
422  Width = 32;
423  } else {
424  llvm_unreachable("Unknown register class");
425  }
426  unsigned HWReg = TRI.getHWRegIndex(Reg);
427  int MaxUsed = HWReg + Width - 1;
428  if (IsSGPR) {
429  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
430  } else if (IsAGPR) {
431  MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
432  } else {
433  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
434  }
435  }
436 
437  if (MI.isCall()) {
438  // Pseudo used just to encode the underlying global. Is there a better
439  // way to track this?
440 
441  const MachineOperand *CalleeOp =
442  TII->getNamedOperand(MI, AMDGPU::OpName::callee);
443 
444  const Function *Callee = getCalleeFunction(*CalleeOp);
446  CallGraphResourceInfo.end();
447 
448  // Avoid crashing on undefined behavior with an illegal call to a
449  // kernel. If a callsite's calling convention doesn't match the
450  // function's, it's undefined behavior. If the callsite calling
451  // convention does match, that would have errored earlier.
452  if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
453  report_fatal_error("invalid call to entry function");
454 
455  bool IsIndirect = !Callee || Callee->isDeclaration();
456  if (!IsIndirect)
457  I = CallGraphResourceInfo.find(Callee);
458 
459  // FIXME: Call site could have norecurse on it
460  if (!Callee || !Callee->doesNotRecurse()) {
461  Info.HasRecursion = true;
462 
463  // TODO: If we happen to know there is no stack usage in the
464  // callgraph, we don't need to assume an infinitely growing stack.
465  if (!MI.isReturn()) {
466  // We don't need to assume an unknown stack size for tail calls.
467 
468  // FIXME: This only benefits in the case where the kernel does not
469  // directly call the tail called function. If a kernel directly
470  // calls a tail recursive function, we'll assume maximum stack size
471  // based on the regular call instruction.
472  CalleeFrameSize =
473  std::max(CalleeFrameSize,
475  }
476  }
477 
478  if (IsIndirect || I == CallGraphResourceInfo.end()) {
479  CalleeFrameSize =
480  std::max(CalleeFrameSize,
482 
483  // Register usage of indirect calls gets handled later
484  Info.UsesVCC = true;
485  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
486  Info.HasDynamicallySizedStack = true;
487  Info.HasIndirectCall = true;
488  } else {
489  // We force CodeGen to run in SCC order, so the callee's register
490  // usage etc. should be the cumulative usage of all callees.
491  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
492  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
493  MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
494  CalleeFrameSize =
495  std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
496  Info.UsesVCC |= I->second.UsesVCC;
497  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
498  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
499  Info.HasRecursion |= I->second.HasRecursion;
500  Info.HasIndirectCall |= I->second.HasIndirectCall;
501  }
502  }
503  }
504  }
505 
506  Info.NumExplicitSGPR = MaxSGPR + 1;
507  Info.NumVGPR = MaxVGPR + 1;
508  Info.NumAGPR = MaxAGPR + 1;
509  Info.PrivateSegmentSize += CalleeFrameSize;
510 
511  return Info;
512 }
513 
514 void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
515  // Collect the maximum number of registers from non-hardware-entrypoints.
516  // All these functions are potential targets for indirect calls.
517  int32_t NonKernelMaxSGPRs = 0;
518  int32_t NonKernelMaxVGPRs = 0;
519  int32_t NonKernelMaxAGPRs = 0;
520 
521  for (const auto &I : CallGraphResourceInfo) {
522  if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
523  auto &Info = I.getSecond();
524  NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
525  NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
526  NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
527  }
528  }
529 
530  // Add register usage for functions with indirect calls.
531  // For calls to unknown functions, we assume the maximum register usage of
532  // all non-hardware-entrypoints in the current module.
533  for (auto &I : CallGraphResourceInfo) {
534  auto &Info = I.getSecond();
535  if (Info.HasIndirectCall) {
536  Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
537  Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
538  Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
539  }
540  }
541 }
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:581
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:738
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Function
Definition: Function.h:60
llvm::po_end
po_iterator< T > po_end(const T &G)
Definition: PostOrderIterator.h:187
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition: AMDGPUBaseInfo.cpp:742
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::AMDGPUResourceUsageAnalysisID
char & AMDGPUResourceUsageAnalysisID
Definition: AMDGPUResourceUsageAnalysis.cpp:44
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
F
#define F(x, y, z)
Definition: MD5.cpp:55
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUResourceUsageAnalysis.cpp:41
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
hasAnyNonFlatUseOfReg
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
Definition: AMDGPUResourceUsageAnalysis.cpp:73
GlobalValue.h
TargetMachine.h
llvm::po_begin
po_iterator< T > po_begin(const T &G)
Definition: PostOrderIterator.h:185
GCNSubtarget.h
AssumedStackSizeForDynamicSizeObjects
static cl::opt< uint32_t > AssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:34
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:369
llvm::MachineModuleInfo
This class contains meta information specific to a module.
Definition: MachineModuleInfo.h:75
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1639
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::cl::opt
Definition: CommandLine.h:1392
AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:830
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::AMDGPUResourceUsageAnalysis::runOnModule
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
Definition: AMDGPUResourceUsageAnalysis.cpp:100
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
TargetPassConfig.h
llvm::MachineRegisterInfo::reg_operands
iterator_range< reg_iterator > reg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:294
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs
int32_t getTotalNumVGPRs(const GCNSubtarget &ST, int32_t NumAGPR, int32_t NumVGPR) const
Definition: AMDGPUResourceUsageAnalysis.cpp:90
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:672
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:304
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineRegisterInfo::isLiveIn
bool isLiveIn(Register Reg) const
Definition: MachineRegisterInfo.cpp:432
llvm::MachineModuleInfo::getMachineFunction
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
Definition: MachineModuleInfo.cpp:103
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
MachineFrameInfo.h
llvm::AMDGPU::getTotalNumVGPRs
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition: AMDGPUBaseInfo.cpp:1790
GlobalAlias.h
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:436
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const
Definition: AMDGPUResourceUsageAnalysis.cpp:83
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
CallGraph.h
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
PostOrderIterator.h
AssumedStackSizeForExternalCall
static cl::opt< uint32_t > AssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:349
llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
callee
Here we don t need to write any variables to the top of the stack since they don t overwrite each other int callee(int32 arg1, int32 arg2)
llvm::cl::desc
Definition: CommandLine.h:405
llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE, "Function register usage analysis", true, true) static const Function *getCalleeFunction(const MachineOperand &Op)
Definition: AMDGPUResourceUsageAnalysis.cpp:60
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:681
llvm::AMDGPUResourceUsageAnalysis::ID
static char ID
Definition: AMDGPUResourceUsageAnalysis.h:28