LLVM  6.0.0svn
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPURegisterBankInfo.h"
22 #include "SIMachineFunctionInfo.h"
23 #include "llvm/ADT/SmallString.h"
25 #include "llvm/IR/MDBuilder.h"
27 #include <algorithm>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "amdgpu-subtarget"
32 
33 #define GET_SUBTARGETINFO_TARGET_DESC
34 #define GET_SUBTARGETINFO_CTOR
35 #include "AMDGPUGenSubtargetInfo.inc"
36 
38 
41  StringRef GPU, StringRef FS) {
42  // Determine default and user-specified characteristics
43  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
44  // enabled, but some instructions do not respect them and they run at the
45  // double precision rate, so don't enable by default.
46  //
47  // We want to be able to turn these off, but making this a subtarget feature
48  // for SI has the unhelpful behavior that it unsets everything else if you
49  // disable it.
50 
51  SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,");
52  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
53  FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
54 
55  FullFS += FS;
56 
57  ParseSubtargetFeatures(GPU, FullFS);
58 
59  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
60  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
61  // variants of MUBUF instructions.
62  if (!hasAddr64() && !FS.contains("flat-for-global")) {
63  FlatForGlobal = true;
64  }
65 
66  // FIXME: I don't think think Evergreen has any useful support for
67  // denormals, but should be checked. Should we issue a warning somewhere
68  // if someone tries to enable these?
70  FP64FP16Denormals = false;
71  FP32Denormals = false;
72  }
73 
74  // Set defaults if needed.
75  if (MaxPrivateElementSize == 0)
77 
78  if (LDSBankCount == 0)
79  LDSBankCount = 32;
80 
81  if (TT.getArch() == Triple::amdgcn) {
82  if (LocalMemorySize == 0)
83  LocalMemorySize = 32768;
84 
85  // Do something sensible for unspecified target.
86  if (!HasMovrel && !HasVGPRIndexMode)
87  HasMovrel = true;
88  }
89 
90  return *this;
91 }
92 
94  const TargetMachine &TM)
95  : AMDGPUGenSubtargetInfo(TT, GPU, FS),
96  TargetTriple(TT),
97  Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
99  WavefrontSize(0),
100  LocalMemorySize(0),
101  LDSBankCount(0),
103 
104  FastFMAF32(false),
106 
110  DX10Clamp(false),
116 
123 
130  DumpCode(false),
131 
132  FP64(false),
133  IsGCN(false),
135  CIInsts(false),
136  GFX9Insts(false),
143  HasMovrel(false),
147  HasSDWA(false),
151  HasSDWAMac(false),
153  HasDPP(false),
159 
161  CaymanISA(false),
162  CFALUBug(false),
164  TexVTXClauseSize(0),
166 
168  InstrItins(getInstrItineraryForCPU(GPU)) {
169  AS = AMDGPU::getAMDGPUAS(TT);
170  initializeSubtargetDependencies(TT, GPU, FS);
171 }
172 
174  const Function &F) const {
175  if (NWaves == 1)
176  return getLocalMemorySize();
177  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
178  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
179  unsigned MaxWaves = getMaxWavesPerEU();
180  return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
181 }
182 
184  const Function &F) const {
185  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
186  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
187  unsigned MaxWaves = getMaxWavesPerEU();
188  unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
189  unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
190  NumWaves = std::min(NumWaves, MaxWaves);
191  NumWaves = std::max(NumWaves, 1u);
192  return NumWaves;
193 }
194 
195 std::pair<unsigned, unsigned>
197  switch (CC) {
201  return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
208  return std::make_pair(1, getWavefrontSize());
209  default:
210  return std::make_pair(1, 16 * getWavefrontSize());
211  }
212 }
213 
214 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
215  const Function &F) const {
216  // FIXME: 1024 if function.
217  // Default minimum/maximum flat work group sizes.
218  std::pair<unsigned, unsigned> Default =
220 
221  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
222  // starts using "amdgpu-flat-work-group-size" attribute.
223  Default.second = AMDGPU::getIntegerAttribute(
224  F, "amdgpu-max-work-group-size", Default.second);
225  Default.first = std::min(Default.first, Default.second);
226 
227  // Requested minimum/maximum flat work group sizes.
228  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
229  F, "amdgpu-flat-work-group-size", Default);
230 
231  // Make sure requested minimum is less than requested maximum.
232  if (Requested.first > Requested.second)
233  return Default;
234 
235  // Make sure requested values do not violate subtarget's specifications.
236  if (Requested.first < getMinFlatWorkGroupSize())
237  return Default;
238  if (Requested.second > getMaxFlatWorkGroupSize())
239  return Default;
240 
241  return Requested;
242 }
243 
244 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
245  const Function &F) const {
246  // Default minimum/maximum number of waves per execution unit.
247  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
248 
249  // Default/requested minimum/maximum flat work group sizes.
250  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
251 
252  // If minimum/maximum flat work group sizes were explicitly requested using
253  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
254  // number of waves per execution unit to values implied by requested
255  // minimum/maximum flat work group sizes.
256  unsigned MinImpliedByFlatWorkGroupSize =
257  getMaxWavesPerEU(FlatWorkGroupSizes.second);
258  bool RequestedFlatWorkGroupSize = false;
259 
260  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
261  // starts using "amdgpu-flat-work-group-size" attribute.
262  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
263  F.hasFnAttribute("amdgpu-flat-work-group-size")) {
264  Default.first = MinImpliedByFlatWorkGroupSize;
265  RequestedFlatWorkGroupSize = true;
266  }
267 
268  // Requested minimum/maximum number of waves per execution unit.
269  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
270  F, "amdgpu-waves-per-eu", Default, true);
271 
272  // Make sure requested minimum is less than requested maximum.
273  if (Requested.second && Requested.first > Requested.second)
274  return Default;
275 
276  // Make sure requested values do not violate subtarget's specifications.
277  if (Requested.first < getMinWavesPerEU() ||
278  Requested.first > getMaxWavesPerEU())
279  return Default;
280  if (Requested.second > getMaxWavesPerEU())
281  return Default;
282 
283  // Make sure requested values are compatible with values implied by requested
284  // minimum/maximum flat work group sizes.
285  if (RequestedFlatWorkGroupSize &&
286  Requested.first < MinImpliedByFlatWorkGroupSize)
287  return Default;
288 
289  return Requested;
290 }
291 
293  Function *Kernel = I->getParent()->getParent();
294  unsigned MinSize = 0;
295  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
296  bool IdQuery = false;
297 
298  // If reqd_work_group_size is present it narrows value down.
299  if (auto *CI = dyn_cast<CallInst>(I)) {
300  const Function *F = CI->getCalledFunction();
301  if (F) {
302  unsigned Dim = UINT_MAX;
303  switch (F->getIntrinsicID()) {
304  case Intrinsic::amdgcn_workitem_id_x:
305  case Intrinsic::r600_read_tidig_x:
306  IdQuery = true;
308  case Intrinsic::r600_read_local_size_x:
309  Dim = 0;
310  break;
311  case Intrinsic::amdgcn_workitem_id_y:
312  case Intrinsic::r600_read_tidig_y:
313  IdQuery = true;
315  case Intrinsic::r600_read_local_size_y:
316  Dim = 1;
317  break;
318  case Intrinsic::amdgcn_workitem_id_z:
319  case Intrinsic::r600_read_tidig_z:
320  IdQuery = true;
322  case Intrinsic::r600_read_local_size_z:
323  Dim = 2;
324  break;
325  default:
326  break;
327  }
328  if (Dim <= 3) {
329  if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
330  if (Node->getNumOperands() == 3)
331  MinSize = MaxSize = mdconst::extract<ConstantInt>(
332  Node->getOperand(Dim))->getZExtValue();
333  }
334  }
335  }
336 
337  if (!MaxSize)
338  return false;
339 
340  // Range metadata is [Lo, Hi). For ID query we need to pass max size
341  // as Hi. For size query we need to pass Hi + 1.
342  if (IdQuery)
343  MinSize = 0;
344  else
345  ++MaxSize;
346 
347  MDBuilder MDB(I->getContext());
348  MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
349  APInt(32, MaxSize));
350  I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
351  return true;
352 }
353 
355  const TargetMachine &TM) :
356  AMDGPUSubtarget(TT, GPU, FS, TM),
357  InstrInfo(*this),
358  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
359  TLInfo(TM, *this) {}
360 
362  const TargetMachine &TM)
363  : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
364  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
365  TLInfo(TM, *this) {
366  CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
367  Legalizer.reset(new AMDGPULegalizerInfo());
368 
369  RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
370  InstSelector.reset(new AMDGPUInstructionSelector(
371  *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get())));
372 }
373 
375  unsigned NumRegionInstrs) const {
376  // Track register pressure so the scheduler can try to decrease
377  // pressure once register usage is above the threshold defined by
378  // SIRegisterInfo::getRegPressureSetLimit()
379  Policy.ShouldTrackPressure = true;
380 
381  // Enabling both top down and bottom up scheduling seems to give us less
382  // register spills than just using one of these approaches on its own.
383  Policy.OnlyTopDown = false;
384  Policy.OnlyBottomUp = false;
385 
386  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
387  if (!enableSIScheduler())
388  Policy.ShouldTrackLaneMasks = true;
389 }
390 
393 }
394 
396  unsigned ExplicitArgBytes) const {
397  unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
398  if (ImplicitBytes == 0)
399  return ExplicitArgBytes;
400 
401  unsigned Alignment = getAlignmentForImplicitArgPtr();
402  return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
403 }
404 
405 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
407  if (SGPRs <= 80)
408  return 10;
409  if (SGPRs <= 88)
410  return 9;
411  if (SGPRs <= 100)
412  return 8;
413  return 7;
414  }
415  if (SGPRs <= 48)
416  return 10;
417  if (SGPRs <= 56)
418  return 9;
419  if (SGPRs <= 64)
420  return 8;
421  if (SGPRs <= 72)
422  return 7;
423  if (SGPRs <= 80)
424  return 6;
425  return 5;
426 }
427 
428 unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
429  if (VGPRs <= 24)
430  return 10;
431  if (VGPRs <= 28)
432  return 9;
433  if (VGPRs <= 32)
434  return 8;
435  if (VGPRs <= 36)
436  return 7;
437  if (VGPRs <= 40)
438  return 6;
439  if (VGPRs <= 48)
440  return 5;
441  if (VGPRs <= 64)
442  return 4;
443  if (VGPRs <= 84)
444  return 3;
445  if (VGPRs <= 128)
446  return 2;
447  return 1;
448 }
449 
452  if (MFI.hasFlatScratchInit()) {
454  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
456  return 4; // FLAT_SCRATCH, VCC (in that order).
457  }
458 
459  if (isXNACKEnabled())
460  return 4; // XNACK, VCC (in that order).
461  return 2; // VCC.
462 }
463 
464 unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
465  const Function &F = *MF.getFunction();
467 
468  // Compute maximum number of SGPRs function can use using default/requested
469  // minimum number of waves per execution unit.
470  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
471  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
472  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
473 
474  // Check if maximum number of SGPRs was explicitly requested using
475  // "amdgpu-num-sgpr" attribute.
476  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
477  unsigned Requested = AMDGPU::getIntegerAttribute(
478  F, "amdgpu-num-sgpr", MaxNumSGPRs);
479 
480  // Make sure requested value does not violate subtarget's specifications.
481  if (Requested && (Requested <= getReservedNumSGPRs(MF)))
482  Requested = 0;
483 
484  // If more SGPRs are required to support the input user/system SGPRs,
485  // increase to accommodate them.
486  //
487  // FIXME: This really ends up using the requested number of SGPRs + number
488  // of reserved special registers in total. Theoretically you could re-use
489  // the last input registers for these special registers, but this would
490  // require a lot of complexity to deal with the weird aliasing.
491  unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
492  if (Requested && Requested < InputNumSGPRs)
493  Requested = InputNumSGPRs;
494 
495  // Make sure requested value is compatible with values implied by
496  // default/requested minimum/maximum number of waves per execution unit.
497  if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
498  Requested = 0;
499  if (WavesPerEU.second &&
500  Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
501  Requested = 0;
502 
503  if (Requested)
504  MaxNumSGPRs = Requested;
505  }
506 
507  if (hasSGPRInitBug())
509 
510  return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
511  MaxAddressableNumSGPRs);
512 }
513 
514 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
515  const Function &F = *MF.getFunction();
517 
518  // Compute maximum number of VGPRs function can use using default/requested
519  // minimum number of waves per execution unit.
520  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
521  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
522 
523  // Check if maximum number of VGPRs was explicitly requested using
524  // "amdgpu-num-vgpr" attribute.
525  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
526  unsigned Requested = AMDGPU::getIntegerAttribute(
527  F, "amdgpu-num-vgpr", MaxNumVGPRs);
528 
529  // Make sure requested value does not violate subtarget's specifications.
530  if (Requested && Requested <= getReservedNumVGPRs(MF))
531  Requested = 0;
532 
533  // Make sure requested value is compatible with values implied by
534  // default/requested minimum/maximum number of waves per execution unit.
535  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
536  Requested = 0;
537  if (WavesPerEU.second &&
538  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
539  Requested = 0;
540 
541  if (Requested)
542  MaxNumVGPRs = Requested;
543  }
544 
545  return MaxNumVGPRs - getReservedNumVGPRs(MF);
546 }
547 
548 namespace {
549 struct MemOpClusterMutation : ScheduleDAGMutation {
550  const SIInstrInfo *TII;
551 
552  MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
553 
554  void apply(ScheduleDAGInstrs *DAGInstrs) override {
555  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
556 
557  SUnit *SUa = nullptr;
558  // Search for two consequent memory operations and link them
559  // to prevent scheduler from moving them apart.
560  // In DAG pre-process SUnits are in the original order of
561  // the instructions before scheduling.
562  for (SUnit &SU : DAG->SUnits) {
563  MachineInstr &MI2 = *SU.getInstr();
564  if (!MI2.mayLoad() && !MI2.mayStore()) {
565  SUa = nullptr;
566  continue;
567  }
568  if (!SUa) {
569  SUa = &SU;
570  continue;
571  }
572 
573  MachineInstr &MI1 = *SUa->getInstr();
574  if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) ||
575  (TII->isFLAT(MI1) && TII->isFLAT(MI2)) ||
576  (TII->isSMRD(MI1) && TII->isSMRD(MI2)) ||
577  (TII->isDS(MI1) && TII->isDS(MI2))) {
578  SU.addPredBarrier(SUa);
579 
580  for (const SDep &SI : SU.Preds) {
581  if (SI.getSUnit() != SUa)
582  SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
583  }
584 
585  if (&SU != &DAG->ExitSU) {
586  for (const SDep &SI : SUa->Succs) {
587  if (SI.getSUnit() != &SU)
588  SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
589  }
590  }
591  }
592 
593  SUa = &SU;
594  }
595  }
596 };
597 } // namespace
598 
600  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
601  Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
602 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool isVGPRSpillingEnabled(const Function &F) const
Generation getGeneration() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:448
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
This file describes how to lower LLVM calls to machine code calls.
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
AMDGPUAS getAMDGPUAS(const Module &M)
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
Mutate the DAG as a postpass after normal DAG building.
Metadata node.
Definition: Metadata.h:862
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
unsigned getStackAlignment() const
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:411
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:432
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:456
This file declares the targeting of the InstructionSelector class for AMDGPU.
const HexagonInstrInfo * TII
unsigned getMinFlatWorkGroupSize() const
int getLocalMemorySize() const
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1431
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1169
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:285
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const
Scheduling dependency.
Definition: ScheduleDAG.h:50
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:639
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg...
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
std::pair< unsigned, unsigned > getWavesPerEU() const
The AMDGPU TargetMachine interface definition for hw codgen targets.
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
unsigned getWavefrontSize() const
const SIRegisterInfo * getRegisterInfo() const override
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
unsigned getReservedNumVGPRs(const MachineFunction &MF) const
Information about stack frame layout on the target.
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
unsigned getAlignmentForImplicitArgPtr() const
This class provides the information for the target register banks.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Class for arbitrary precision integers.
Definition: APInt.h:69
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
bool isShader(CallingConv::ID cc)
This file declares the targeting of the Machinelegalizer class for AMDGPU.
unsigned getMaxWavesPerEU() const
A ScheduleDAG for scheduling lists of MachineInstr.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
Representation of each machine instruction.
Definition: MachineInstr.h:59
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getMinWavesPerEU() const
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
unsigned getMaxFlatWorkGroupSize() const
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:307
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool enableSIScheduler() const
~AMDGPUSubtarget() override
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:626
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool hasSGPRInitBug() const
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
InstrItineraryData InstrItins
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool isXNACKEnabled() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
const BasicBlock * getParent() const
Definition: Instruction.h:66