LLVM  7.0.0svn
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPURegisterBankInfo.h"
22 #include "SIMachineFunctionInfo.h"
23 #include "llvm/ADT/SmallString.h"
25 #include "llvm/IR/MDBuilder.h"
27 #include <algorithm>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "amdgpu-subtarget"
32 
33 #define GET_SUBTARGETINFO_TARGET_DESC
34 #define GET_SUBTARGETINFO_CTOR
35 #include "AMDGPUGenSubtargetInfo.inc"
36 
38 
41  StringRef GPU, StringRef FS) {
42  // Determine default and user-specified characteristics
43  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
44  // enabled, but some instructions do not respect them and they run at the
45  // double precision rate, so don't enable by default.
46  //
47  // We want to be able to turn these off, but making this a subtarget feature
48  // for SI has the unhelpful behavior that it unsets everything else if you
49  // disable it.
50 
51  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
52 
53  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
54  FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
55 
56  // FIXME: I don't think think Evergreen has any useful support for
57  // denormals, but should be checked. Should we issue a warning somewhere
58  // if someone tries to enable these?
60  FullFS += "+fp64-fp16-denormals,";
61  } else {
62  FullFS += "-fp32-denormals,";
63  }
64 
65  FullFS += FS;
66 
67  ParseSubtargetFeatures(GPU, FullFS);
68 
69  // We don't support FP64 for EG/NI atm.
71 
72  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
73  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
74  // variants of MUBUF instructions.
75  if (!hasAddr64() && !FS.contains("flat-for-global")) {
76  FlatForGlobal = true;
77  }
78 
79  // Set defaults if needed.
80  if (MaxPrivateElementSize == 0)
82 
83  if (LDSBankCount == 0)
84  LDSBankCount = 32;
85 
86  if (TT.getArch() == Triple::amdgcn) {
87  if (LocalMemorySize == 0)
88  LocalMemorySize = 32768;
89 
90  // Do something sensible for unspecified target.
91  if (!HasMovrel && !HasVGPRIndexMode)
92  HasMovrel = true;
93  }
94 
95  return *this;
96 }
97 
99  const TargetMachine &TM)
100  : AMDGPUGenSubtargetInfo(TT, GPU, FS),
101  TargetTriple(TT),
102  Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
104  WavefrontSize(0),
105  LocalMemorySize(0),
106  LDSBankCount(0),
108 
109  FastFMAF32(false),
111 
115  DX10Clamp(false),
121 
128 
135  DumpCode(false),
136 
137  FP64(false),
138  FMA(false),
139  MIMG_R128(false),
140  IsGCN(false),
142  CIInsts(false),
143  GFX9Insts(false),
150  HasMovrel(false),
154  HasSDWA(false),
158  HasSDWAMac(false),
160  HasDPP(false),
167 
169  CaymanISA(false),
170  CFALUBug(false),
172  TexVTXClauseSize(0),
174 
176  InstrItins(getInstrItineraryForCPU(GPU)) {
177  AS = AMDGPU::getAMDGPUAS(TT);
178  initializeSubtargetDependencies(TT, GPU, FS);
179 }
180 
182  const Function &F) const {
183  if (NWaves == 1)
184  return getLocalMemorySize();
185  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
186  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
187  unsigned MaxWaves = getMaxWavesPerEU();
188  return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
189 }
190 
192  const Function &F) const {
193  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
194  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
195  unsigned MaxWaves = getMaxWavesPerEU();
196  unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
197  unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
198  NumWaves = std::min(NumWaves, MaxWaves);
199  NumWaves = std::max(NumWaves, 1u);
200  return NumWaves;
201 }
202 
203 std::pair<unsigned, unsigned>
205  switch (CC) {
209  return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
216  return std::make_pair(1, getWavefrontSize());
217  default:
218  return std::make_pair(1, 16 * getWavefrontSize());
219  }
220 }
221 
222 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
223  const Function &F) const {
224  // FIXME: 1024 if function.
225  // Default minimum/maximum flat work group sizes.
226  std::pair<unsigned, unsigned> Default =
228 
229  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
230  // starts using "amdgpu-flat-work-group-size" attribute.
231  Default.second = AMDGPU::getIntegerAttribute(
232  F, "amdgpu-max-work-group-size", Default.second);
233  Default.first = std::min(Default.first, Default.second);
234 
235  // Requested minimum/maximum flat work group sizes.
236  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
237  F, "amdgpu-flat-work-group-size", Default);
238 
239  // Make sure requested minimum is less than requested maximum.
240  if (Requested.first > Requested.second)
241  return Default;
242 
243  // Make sure requested values do not violate subtarget's specifications.
244  if (Requested.first < getMinFlatWorkGroupSize())
245  return Default;
246  if (Requested.second > getMaxFlatWorkGroupSize())
247  return Default;
248 
249  return Requested;
250 }
251 
252 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
253  const Function &F) const {
254  // Default minimum/maximum number of waves per execution unit.
255  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
256 
257  // Default/requested minimum/maximum flat work group sizes.
258  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
259 
260  // If minimum/maximum flat work group sizes were explicitly requested using
261  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
262  // number of waves per execution unit to values implied by requested
263  // minimum/maximum flat work group sizes.
264  unsigned MinImpliedByFlatWorkGroupSize =
265  getMaxWavesPerEU(FlatWorkGroupSizes.second);
266  bool RequestedFlatWorkGroupSize = false;
267 
268  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
269  // starts using "amdgpu-flat-work-group-size" attribute.
270  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
271  F.hasFnAttribute("amdgpu-flat-work-group-size")) {
272  Default.first = MinImpliedByFlatWorkGroupSize;
273  RequestedFlatWorkGroupSize = true;
274  }
275 
276  // Requested minimum/maximum number of waves per execution unit.
277  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
278  F, "amdgpu-waves-per-eu", Default, true);
279 
280  // Make sure requested minimum is less than requested maximum.
281  if (Requested.second && Requested.first > Requested.second)
282  return Default;
283 
284  // Make sure requested values do not violate subtarget's specifications.
285  if (Requested.first < getMinWavesPerEU() ||
286  Requested.first > getMaxWavesPerEU())
287  return Default;
288  if (Requested.second > getMaxWavesPerEU())
289  return Default;
290 
291  // Make sure requested values are compatible with values implied by requested
292  // minimum/maximum flat work group sizes.
293  if (RequestedFlatWorkGroupSize &&
294  Requested.first < MinImpliedByFlatWorkGroupSize)
295  return Default;
296 
297  return Requested;
298 }
299 
301  Function *Kernel = I->getParent()->getParent();
302  unsigned MinSize = 0;
303  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
304  bool IdQuery = false;
305 
306  // If reqd_work_group_size is present it narrows value down.
307  if (auto *CI = dyn_cast<CallInst>(I)) {
308  const Function *F = CI->getCalledFunction();
309  if (F) {
310  unsigned Dim = UINT_MAX;
311  switch (F->getIntrinsicID()) {
312  case Intrinsic::amdgcn_workitem_id_x:
313  case Intrinsic::r600_read_tidig_x:
314  IdQuery = true;
316  case Intrinsic::r600_read_local_size_x:
317  Dim = 0;
318  break;
319  case Intrinsic::amdgcn_workitem_id_y:
320  case Intrinsic::r600_read_tidig_y:
321  IdQuery = true;
323  case Intrinsic::r600_read_local_size_y:
324  Dim = 1;
325  break;
326  case Intrinsic::amdgcn_workitem_id_z:
327  case Intrinsic::r600_read_tidig_z:
328  IdQuery = true;
330  case Intrinsic::r600_read_local_size_z:
331  Dim = 2;
332  break;
333  default:
334  break;
335  }
336  if (Dim <= 3) {
337  if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
338  if (Node->getNumOperands() == 3)
339  MinSize = MaxSize = mdconst::extract<ConstantInt>(
340  Node->getOperand(Dim))->getZExtValue();
341  }
342  }
343  }
344 
345  if (!MaxSize)
346  return false;
347 
348  // Range metadata is [Lo, Hi). For ID query we need to pass max size
349  // as Hi. For size query we need to pass Hi + 1.
350  if (IdQuery)
351  MinSize = 0;
352  else
353  ++MaxSize;
354 
355  MDBuilder MDB(I->getContext());
356  MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
357  APInt(32, MaxSize));
358  I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
359  return true;
360 }
361 
363  const TargetMachine &TM) :
364  AMDGPUSubtarget(TT, GPU, FS, TM),
365  InstrInfo(*this),
366  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
367  TLInfo(TM, *this) {}
368 
370  const TargetMachine &TM)
371  : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
372  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
373  TLInfo(TM, *this) {
374  CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
375  Legalizer.reset(new AMDGPULegalizerInfo());
376 
377  RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
378  InstSelector.reset(new AMDGPUInstructionSelector(
379  *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get())));
380 }
381 
383  unsigned NumRegionInstrs) const {
384  // Track register pressure so the scheduler can try to decrease
385  // pressure once register usage is above the threshold defined by
386  // SIRegisterInfo::getRegPressureSetLimit()
387  Policy.ShouldTrackPressure = true;
388 
389  // Enabling both top down and bottom up scheduling seems to give us less
390  // register spills than just using one of these approaches on its own.
391  Policy.OnlyTopDown = false;
392  Policy.OnlyBottomUp = false;
393 
394  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
395  if (!enableSIScheduler())
396  Policy.ShouldTrackLaneMasks = true;
397 }
398 
401 }
402 
404  unsigned ExplicitArgBytes) const {
405  unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
406  if (ImplicitBytes == 0)
407  return ExplicitArgBytes;
408 
409  unsigned Alignment = getAlignmentForImplicitArgPtr();
410  return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
411 }
412 
413 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
415  if (SGPRs <= 80)
416  return 10;
417  if (SGPRs <= 88)
418  return 9;
419  if (SGPRs <= 100)
420  return 8;
421  return 7;
422  }
423  if (SGPRs <= 48)
424  return 10;
425  if (SGPRs <= 56)
426  return 9;
427  if (SGPRs <= 64)
428  return 8;
429  if (SGPRs <= 72)
430  return 7;
431  if (SGPRs <= 80)
432  return 6;
433  return 5;
434 }
435 
436 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
437  if (VGPRs <= 24)
438  return 10;
439  if (VGPRs <= 28)
440  return 9;
441  if (VGPRs <= 32)
442  return 8;
443  if (VGPRs <= 36)
444  return 7;
445  if (VGPRs <= 40)
446  return 6;
447  if (VGPRs <= 48)
448  return 5;
449  if (VGPRs <= 64)
450  return 4;
451  if (VGPRs <= 84)
452  return 3;
453  if (VGPRs <= 128)
454  return 2;
455  return 1;
456 }
457 
460  if (MFI.hasFlatScratchInit()) {
462  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
464  return 4; // FLAT_SCRATCH, VCC (in that order).
465  }
466 
467  if (isXNACKEnabled())
468  return 4; // XNACK, VCC (in that order).
469  return 2; // VCC.
470 }
471 
472 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
473  const Function &F = MF.getFunction();
475 
476  // Compute maximum number of SGPRs function can use using default/requested
477  // minimum number of waves per execution unit.
478  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
479  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
480  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
481 
482  // Check if maximum number of SGPRs was explicitly requested using
483  // "amdgpu-num-sgpr" attribute.
484  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
485  unsigned Requested = AMDGPU::getIntegerAttribute(
486  F, "amdgpu-num-sgpr", MaxNumSGPRs);
487 
488  // Make sure requested value does not violate subtarget's specifications.
489  if (Requested && (Requested <= getReservedNumSGPRs(MF)))
490  Requested = 0;
491 
492  // If more SGPRs are required to support the input user/system SGPRs,
493  // increase to accommodate them.
494  //
495  // FIXME: This really ends up using the requested number of SGPRs + number
496  // of reserved special registers in total. Theoretically you could re-use
497  // the last input registers for these special registers, but this would
498  // require a lot of complexity to deal with the weird aliasing.
499  unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
500  if (Requested && Requested < InputNumSGPRs)
501  Requested = InputNumSGPRs;
502 
503  // Make sure requested value is compatible with values implied by
504  // default/requested minimum/maximum number of waves per execution unit.
505  if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
506  Requested = 0;
507  if (WavesPerEU.second &&
508  Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
509  Requested = 0;
510 
511  if (Requested)
512  MaxNumSGPRs = Requested;
513  }
514 
515  if (hasSGPRInitBug())
517 
518  return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
519  MaxAddressableNumSGPRs);
520 }
521 
522 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
523  const Function &F = MF.getFunction();
525 
526  // Compute maximum number of VGPRs function can use using default/requested
527  // minimum number of waves per execution unit.
528  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
529  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
530 
531  // Check if maximum number of VGPRs was explicitly requested using
532  // "amdgpu-num-vgpr" attribute.
533  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
534  unsigned Requested = AMDGPU::getIntegerAttribute(
535  F, "amdgpu-num-vgpr", MaxNumVGPRs);
536 
537  // Make sure requested value does not violate subtarget's specifications.
538  if (Requested && Requested <= getReservedNumVGPRs(MF))
539  Requested = 0;
540 
541  // Make sure requested value is compatible with values implied by
542  // default/requested minimum/maximum number of waves per execution unit.
543  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
544  Requested = 0;
545  if (WavesPerEU.second &&
546  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
547  Requested = 0;
548 
549  if (Requested)
550  MaxNumVGPRs = Requested;
551  }
552 
553  return MaxNumVGPRs - getReservedNumVGPRs(MF);
554 }
555 
556 namespace {
557 struct MemOpClusterMutation : ScheduleDAGMutation {
558  const SIInstrInfo *TII;
559 
560  MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
561 
562  void apply(ScheduleDAGInstrs *DAGInstrs) override {
563  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
564 
565  SUnit *SUa = nullptr;
566  // Search for two consequent memory operations and link them
567  // to prevent scheduler from moving them apart.
568  // In DAG pre-process SUnits are in the original order of
569  // the instructions before scheduling.
570  for (SUnit &SU : DAG->SUnits) {
571  MachineInstr &MI2 = *SU.getInstr();
572  if (!MI2.mayLoad() && !MI2.mayStore()) {
573  SUa = nullptr;
574  continue;
575  }
576  if (!SUa) {
577  SUa = &SU;
578  continue;
579  }
580 
581  MachineInstr &MI1 = *SUa->getInstr();
582  if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) ||
583  (TII->isFLAT(MI1) && TII->isFLAT(MI2)) ||
584  (TII->isSMRD(MI1) && TII->isSMRD(MI2)) ||
585  (TII->isDS(MI1) && TII->isDS(MI2))) {
586  SU.addPredBarrier(SUa);
587 
588  for (const SDep &SI : SU.Preds) {
589  if (SI.getSUnit() != SUa)
590  SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
591  }
592 
593  if (&SU != &DAG->ExitSU) {
594  for (const SDep &SI : SUa->Succs) {
595  if (SI.getSUnit() != &SU)
596  SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
597  }
598  }
599  }
600 
601  SUa = &SU;
602  }
603  }
604 };
605 } // namespace
606 
608  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
609  Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
610 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool isVGPRSpillingEnabled(const Function &F) const
Generation getGeneration() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:448
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
This file describes how to lower LLVM calls to machine code calls.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
AMDGPUAS getAMDGPUAS(const Module &M)
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:302
Mutate the DAG as a postpass after normal DAG building.
Metadata node.
Definition: Metadata.h:862
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
unsigned getStackAlignment() const
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:414
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:435
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:467
This file declares the targeting of the InstructionSelector class for AMDGPU.
const HexagonInstrInfo * TII
unsigned getMinFlatWorkGroupSize() const
int getLocalMemorySize() const
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1439
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1169
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:285
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const
Scheduling dependency.
Definition: ScheduleDAG.h:50
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:642
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg...
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
std::pair< unsigned, unsigned > getWavesPerEU() const
The AMDGPU TargetMachine interface definition for hw codgen targets.
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getWavefrontSize() const
const SIRegisterInfo * getRegisterInfo() const override
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
unsigned getReservedNumVGPRs(const MachineFunction &MF) const
Information about stack frame layout on the target.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
unsigned getAlignmentForImplicitArgPtr() const
This class provides the information for the target register banks.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isShader(CallingConv::ID cc)
This file declares the targeting of the Machinelegalizer class for AMDGPU.
unsigned getMaxWavesPerEU() const
A ScheduleDAG for scheduling lists of MachineInstr.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
Representation of each machine instruction.
Definition: MachineInstr.h:60
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getMinWavesPerEU() const
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
unsigned getMaxFlatWorkGroupSize() const
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:310
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool enableSIScheduler() const
~AMDGPUSubtarget() override
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:629
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool hasSGPRInitBug() const
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
InstrItineraryData InstrItins
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool isXNACKEnabled() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
const BasicBlock * getParent() const
Definition: Instruction.h:67