LLVM  7.0.0svn
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPURegisterBankInfo.h"
22 #include "SIMachineFunctionInfo.h"
24 #include "llvm/ADT/SmallString.h"
26 #include "llvm/IR/MDBuilder.h"
28 #include <algorithm>
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "amdgpu-subtarget"
33 
34 #define GET_SUBTARGETINFO_TARGET_DESC
35 #define GET_SUBTARGETINFO_CTOR
36 #include "AMDGPUGenSubtargetInfo.inc"
37 
39 
42  StringRef GPU, StringRef FS) {
43  // Determine default and user-specified characteristics
44  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
45  // enabled, but some instructions do not respect them and they run at the
46  // double precision rate, so don't enable by default.
47  //
48  // We want to be able to turn these off, but making this a subtarget feature
49  // for SI has the unhelpful behavior that it unsets everything else if you
50  // disable it.
51 
52  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
53 
54  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
55  FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
56 
57  // FIXME: I don't think think Evergreen has any useful support for
58  // denormals, but should be checked. Should we issue a warning somewhere
59  // if someone tries to enable these?
61  FullFS += "+fp64-fp16-denormals,";
62  } else {
63  FullFS += "-fp32-denormals,";
64  }
65 
66  FullFS += FS;
67 
68  ParseSubtargetFeatures(GPU, FullFS);
69 
70  // We don't support FP64 for EG/NI atm.
72 
73  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
74  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
75  // variants of MUBUF instructions.
76  if (!hasAddr64() && !FS.contains("flat-for-global")) {
77  FlatForGlobal = true;
78  }
79 
80  // Set defaults if needed.
81  if (MaxPrivateElementSize == 0)
83 
84  if (LDSBankCount == 0)
85  LDSBankCount = 32;
86 
87  if (TT.getArch() == Triple::amdgcn) {
88  if (LocalMemorySize == 0)
89  LocalMemorySize = 32768;
90 
91  // Do something sensible for unspecified target.
92  if (!HasMovrel && !HasVGPRIndexMode)
93  HasMovrel = true;
94  }
95 
96  return *this;
97 }
98 
100  const TargetMachine &TM)
101  : AMDGPUGenSubtargetInfo(TT, GPU, FS),
102  TargetTriple(TT),
103  Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
105  WavefrontSize(0),
106  LocalMemorySize(0),
107  LDSBankCount(0),
109 
110  FastFMAF32(false),
112 
116  DX10Clamp(false),
122 
129 
137  DumpCode(false),
138 
139  FP64(false),
140  FMA(false),
141  MIMG_R128(false),
142  IsGCN(false),
144  CIInsts(false),
145  GFX9Insts(false),
153  HasMovrel(false),
158  HasSDWA(false),
162  HasSDWAMac(false),
164  HasDPP(false),
165  HasDLInsts(false),
173 
175  CaymanISA(false),
176  CFALUBug(false),
178  TexVTXClauseSize(0),
180 
182  InstrItins(getInstrItineraryForCPU(GPU)) {
183  AS = AMDGPU::getAMDGPUAS(TT);
184  initializeSubtargetDependencies(TT, GPU, FS);
185 }
186 
188  const Function &F) const {
189  if (NWaves == 1)
190  return getLocalMemorySize();
191  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
192  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
193  unsigned MaxWaves = getMaxWavesPerEU();
194  return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
195 }
196 
198  const Function &F) const {
199  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
200  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
201  unsigned MaxWaves = getMaxWavesPerEU();
202  unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
203  unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
204  NumWaves = std::min(NumWaves, MaxWaves);
205  NumWaves = std::max(NumWaves, 1u);
206  return NumWaves;
207 }
208 
209 unsigned
211  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
212  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
213 }
214 
215 std::pair<unsigned, unsigned>
217  switch (CC) {
221  return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
228  return std::make_pair(1, getWavefrontSize());
229  default:
230  return std::make_pair(1, 16 * getWavefrontSize());
231  }
232 }
233 
234 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
235  const Function &F) const {
236  // FIXME: 1024 if function.
237  // Default minimum/maximum flat work group sizes.
238  std::pair<unsigned, unsigned> Default =
240 
241  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
242  // starts using "amdgpu-flat-work-group-size" attribute.
243  Default.second = AMDGPU::getIntegerAttribute(
244  F, "amdgpu-max-work-group-size", Default.second);
245  Default.first = std::min(Default.first, Default.second);
246 
247  // Requested minimum/maximum flat work group sizes.
248  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
249  F, "amdgpu-flat-work-group-size", Default);
250 
251  // Make sure requested minimum is less than requested maximum.
252  if (Requested.first > Requested.second)
253  return Default;
254 
255  // Make sure requested values do not violate subtarget's specifications.
256  if (Requested.first < getMinFlatWorkGroupSize())
257  return Default;
258  if (Requested.second > getMaxFlatWorkGroupSize())
259  return Default;
260 
261  return Requested;
262 }
263 
264 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
265  const Function &F) const {
266  // Default minimum/maximum number of waves per execution unit.
267  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
268 
269  // Default/requested minimum/maximum flat work group sizes.
270  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
271 
272  // If minimum/maximum flat work group sizes were explicitly requested using
273  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
274  // number of waves per execution unit to values implied by requested
275  // minimum/maximum flat work group sizes.
276  unsigned MinImpliedByFlatWorkGroupSize =
277  getMaxWavesPerEU(FlatWorkGroupSizes.second);
278  bool RequestedFlatWorkGroupSize = false;
279 
280  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
281  // starts using "amdgpu-flat-work-group-size" attribute.
282  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
283  F.hasFnAttribute("amdgpu-flat-work-group-size")) {
284  Default.first = MinImpliedByFlatWorkGroupSize;
285  RequestedFlatWorkGroupSize = true;
286  }
287 
288  // Requested minimum/maximum number of waves per execution unit.
289  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
290  F, "amdgpu-waves-per-eu", Default, true);
291 
292  // Make sure requested minimum is less than requested maximum.
293  if (Requested.second && Requested.first > Requested.second)
294  return Default;
295 
296  // Make sure requested values do not violate subtarget's specifications.
297  if (Requested.first < getMinWavesPerEU() ||
298  Requested.first > getMaxWavesPerEU())
299  return Default;
300  if (Requested.second > getMaxWavesPerEU())
301  return Default;
302 
303  // Make sure requested values are compatible with values implied by requested
304  // minimum/maximum flat work group sizes.
305  if (RequestedFlatWorkGroupSize &&
306  Requested.first < MinImpliedByFlatWorkGroupSize)
307  return Default;
308 
309  return Requested;
310 }
311 
313  Function *Kernel = I->getParent()->getParent();
314  unsigned MinSize = 0;
315  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
316  bool IdQuery = false;
317 
318  // If reqd_work_group_size is present it narrows value down.
319  if (auto *CI = dyn_cast<CallInst>(I)) {
320  const Function *F = CI->getCalledFunction();
321  if (F) {
322  unsigned Dim = UINT_MAX;
323  switch (F->getIntrinsicID()) {
324  case Intrinsic::amdgcn_workitem_id_x:
325  case Intrinsic::r600_read_tidig_x:
326  IdQuery = true;
328  case Intrinsic::r600_read_local_size_x:
329  Dim = 0;
330  break;
331  case Intrinsic::amdgcn_workitem_id_y:
332  case Intrinsic::r600_read_tidig_y:
333  IdQuery = true;
335  case Intrinsic::r600_read_local_size_y:
336  Dim = 1;
337  break;
338  case Intrinsic::amdgcn_workitem_id_z:
339  case Intrinsic::r600_read_tidig_z:
340  IdQuery = true;
342  case Intrinsic::r600_read_local_size_z:
343  Dim = 2;
344  break;
345  default:
346  break;
347  }
348  if (Dim <= 3) {
349  if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
350  if (Node->getNumOperands() == 3)
351  MinSize = MaxSize = mdconst::extract<ConstantInt>(
352  Node->getOperand(Dim))->getZExtValue();
353  }
354  }
355  }
356 
357  if (!MaxSize)
358  return false;
359 
360  // Range metadata is [Lo, Hi). For ID query we need to pass max size
361  // as Hi. For size query we need to pass Hi + 1.
362  if (IdQuery)
363  MinSize = 0;
364  else
365  ++MaxSize;
366 
367  MDBuilder MDB(I->getContext());
368  MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
369  APInt(32, MaxSize));
370  I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
371  return true;
372 }
373 
375  const TargetMachine &TM) :
376  AMDGPUSubtarget(TT, GPU, FS, TM),
377  InstrInfo(*this),
378  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
379  TLInfo(TM, *this) {}
380 
382  const GCNTargetMachine &TM)
383  : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
384  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
385  TLInfo(TM, *this) {
386  CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
387  Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
388 
389  RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
390  InstSelector.reset(new AMDGPUInstructionSelector(
391  *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
392 }
393 
395  unsigned NumRegionInstrs) const {
396  // Track register pressure so the scheduler can try to decrease
397  // pressure once register usage is above the threshold defined by
398  // SIRegisterInfo::getRegPressureSetLimit()
399  Policy.ShouldTrackPressure = true;
400 
401  // Enabling both top down and bottom up scheduling seems to give us less
402  // register spills than just using one of these approaches on its own.
403  Policy.OnlyTopDown = false;
404  Policy.OnlyBottomUp = false;
405 
406  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
407  if (!enableSIScheduler())
408  Policy.ShouldTrackLaneMasks = true;
409 }
410 
413 }
414 
416  unsigned ExplicitArgBytes) const {
417  uint64_t TotalSize = ExplicitArgBytes;
418  unsigned ImplicitBytes = getImplicitArgNumBytes(F);
419 
420  if (ImplicitBytes != 0) {
421  unsigned Alignment = getAlignmentForImplicitArgPtr();
422  TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
423  }
424 
425  // Being able to dereference past the end is useful for emitting scalar loads.
426  return alignTo(TotalSize, 4);
427 }
428 
429 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
431  if (SGPRs <= 80)
432  return 10;
433  if (SGPRs <= 88)
434  return 9;
435  if (SGPRs <= 100)
436  return 8;
437  return 7;
438  }
439  if (SGPRs <= 48)
440  return 10;
441  if (SGPRs <= 56)
442  return 9;
443  if (SGPRs <= 64)
444  return 8;
445  if (SGPRs <= 72)
446  return 7;
447  if (SGPRs <= 80)
448  return 6;
449  return 5;
450 }
451 
452 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
453  if (VGPRs <= 24)
454  return 10;
455  if (VGPRs <= 28)
456  return 9;
457  if (VGPRs <= 32)
458  return 8;
459  if (VGPRs <= 36)
460  return 7;
461  if (VGPRs <= 40)
462  return 6;
463  if (VGPRs <= 48)
464  return 5;
465  if (VGPRs <= 64)
466  return 4;
467  if (VGPRs <= 84)
468  return 3;
469  if (VGPRs <= 128)
470  return 2;
471  return 1;
472 }
473 
476  if (MFI.hasFlatScratchInit()) {
478  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
480  return 4; // FLAT_SCRATCH, VCC (in that order).
481  }
482 
483  if (isXNACKEnabled())
484  return 4; // XNACK, VCC (in that order).
485  return 2; // VCC.
486 }
487 
488 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
489  const Function &F = MF.getFunction();
491 
492  // Compute maximum number of SGPRs function can use using default/requested
493  // minimum number of waves per execution unit.
494  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
495  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
496  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
497 
498  // Check if maximum number of SGPRs was explicitly requested using
499  // "amdgpu-num-sgpr" attribute.
500  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
501  unsigned Requested = AMDGPU::getIntegerAttribute(
502  F, "amdgpu-num-sgpr", MaxNumSGPRs);
503 
504  // Make sure requested value does not violate subtarget's specifications.
505  if (Requested && (Requested <= getReservedNumSGPRs(MF)))
506  Requested = 0;
507 
508  // If more SGPRs are required to support the input user/system SGPRs,
509  // increase to accommodate them.
510  //
511  // FIXME: This really ends up using the requested number of SGPRs + number
512  // of reserved special registers in total. Theoretically you could re-use
513  // the last input registers for these special registers, but this would
514  // require a lot of complexity to deal with the weird aliasing.
515  unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
516  if (Requested && Requested < InputNumSGPRs)
517  Requested = InputNumSGPRs;
518 
519  // Make sure requested value is compatible with values implied by
520  // default/requested minimum/maximum number of waves per execution unit.
521  if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
522  Requested = 0;
523  if (WavesPerEU.second &&
524  Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
525  Requested = 0;
526 
527  if (Requested)
528  MaxNumSGPRs = Requested;
529  }
530 
531  if (hasSGPRInitBug())
533 
534  return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
535  MaxAddressableNumSGPRs);
536 }
537 
538 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
539  const Function &F = MF.getFunction();
541 
542  // Compute maximum number of VGPRs function can use using default/requested
543  // minimum number of waves per execution unit.
544  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
545  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
546 
547  // Check if maximum number of VGPRs was explicitly requested using
548  // "amdgpu-num-vgpr" attribute.
549  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
550  unsigned Requested = AMDGPU::getIntegerAttribute(
551  F, "amdgpu-num-vgpr", MaxNumVGPRs);
552 
553  // Make sure requested value does not violate subtarget's specifications.
554  if (Requested && Requested <= getReservedNumVGPRs(MF))
555  Requested = 0;
556 
557  // Make sure requested value is compatible with values implied by
558  // default/requested minimum/maximum number of waves per execution unit.
559  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
560  Requested = 0;
561  if (WavesPerEU.second &&
562  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
563  Requested = 0;
564 
565  if (Requested)
566  MaxNumVGPRs = Requested;
567  }
568 
569  return MaxNumVGPRs - getReservedNumVGPRs(MF);
570 }
571 
572 namespace {
573 struct MemOpClusterMutation : ScheduleDAGMutation {
574  const SIInstrInfo *TII;
575 
576  MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
577 
578  void apply(ScheduleDAGInstrs *DAGInstrs) override {
579  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
580 
581  SUnit *SUa = nullptr;
582  // Search for two consequent memory operations and link them
583  // to prevent scheduler from moving them apart.
584  // In DAG pre-process SUnits are in the original order of
585  // the instructions before scheduling.
586  for (SUnit &SU : DAG->SUnits) {
587  MachineInstr &MI2 = *SU.getInstr();
588  if (!MI2.mayLoad() && !MI2.mayStore()) {
589  SUa = nullptr;
590  continue;
591  }
592  if (!SUa) {
593  SUa = &SU;
594  continue;
595  }
596 
597  MachineInstr &MI1 = *SUa->getInstr();
598  if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) ||
599  (TII->isFLAT(MI1) && TII->isFLAT(MI2)) ||
600  (TII->isSMRD(MI1) && TII->isSMRD(MI2)) ||
601  (TII->isDS(MI1) && TII->isDS(MI2))) {
602  SU.addPredBarrier(SUa);
603 
604  for (const SDep &SI : SU.Preds) {
605  if (SI.getSUnit() != SUa)
606  SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
607  }
608 
609  if (&SU != &DAG->ExitSU) {
610  for (const SDep &SI : SUa->Succs) {
611  if (SI.getSUnit() != &SU)
612  SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
613  }
614  }
615  }
616 
617  SUa = &SU;
618  }
619  }
620 };
621 } // namespace
622 
624  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
625  Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
626 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool isVGPRSpillingEnabled(const Function &F) const
Generation getGeneration() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:448
unsigned getImplicitArgNumBytes(const Function &F) const
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
This file describes how to lower LLVM calls to machine code calls.
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
AMDGPUAS getAMDGPUAS(const Module &M)
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, const GCNTargetMachine &TM)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
Mutate the DAG as a postpass after normal DAG building.
Metadata node.
Definition: Metadata.h:862
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
unsigned getStackAlignment() const
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:414
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:424
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:456
This file declares the targeting of the InstructionSelector class for AMDGPU.
const HexagonInstrInfo * TII
unsigned getMinFlatWorkGroupSize() const
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
int getLocalMemorySize() const
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1444
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1173
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:283
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
Scheduling dependency.
Definition: ScheduleDAG.h:50
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:672
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg...
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
std::pair< unsigned, unsigned > getWavesPerEU() const
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
The AMDGPU TargetMachine interface definition for hw codgen targets.
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getWavefrontSize() const
const SIRegisterInfo * getRegisterInfo() const override
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
unsigned getReservedNumVGPRs(const MachineFunction &MF) const
Information about stack frame layout on the target.
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
unsigned getAlignmentForImplicitArgPtr() const
This class provides the information for the target register banks.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:180
const Function & getFunction() const
Return the LLVM function that this machine code represents.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isShader(CallingConv::ID cc)
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
unsigned getMaxWavesPerEU() const
Provides AMDGPU specific target descriptions.
A ScheduleDAG for scheduling lists of MachineInstr.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
Representation of each machine instruction.
Definition: MachineInstr.h:60
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getMinWavesPerEU() const
unsigned getMaxFlatWorkGroupSize() const
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:310
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool enableSIScheduler() const
~AMDGPUSubtarget() override
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:659
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool hasSGPRInitBug() const
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
InstrItineraryData InstrItins
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool isXNACKEnabled() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
unsigned getKernArgSegmentSize(const Function &F, unsigned ExplictArgBytes) const
const BasicBlock * getParent() const
Definition: Instruction.h:67