LLVM  8.0.0svn
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUTargetMachine.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPURegisterBankInfo.h"
22 #include "SIMachineFunctionInfo.h"
24 #include "llvm/ADT/SmallString.h"
27 #include "llvm/IR/MDBuilder.h"
29 #include <algorithm>
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "amdgpu-subtarget"
34 
35 #define GET_SUBTARGETINFO_TARGET_DESC
36 #define GET_SUBTARGETINFO_CTOR
37 #define AMDGPUSubtarget GCNSubtarget
38 #include "AMDGPUGenSubtargetInfo.inc"
39 #define GET_SUBTARGETINFO_TARGET_DESC
40 #define GET_SUBTARGETINFO_CTOR
41 #undef AMDGPUSubtarget
42 #include "R600GenSubtargetInfo.inc"
43 
44 GCNSubtarget::~GCNSubtarget() = default;
45 
48  StringRef GPU, StringRef FS) {
49  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
50  FullFS += FS;
51  ParseSubtargetFeatures(GPU, FullFS);
52 
53  // FIXME: I don't think think Evergreen has any useful support for
54  // denormals, but should be checked. Should we issue a warning somewhere
55  // if someone tries to enable these?
57  FP32Denormals = false;
58  }
59 
62 
63  return *this;
64 }
65 
68  StringRef GPU, StringRef FS) {
69  // Determine default and user-specified characteristics
70  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
71  // enabled, but some instructions do not respect them and they run at the
72  // double precision rate, so don't enable by default.
73  //
74  // We want to be able to turn these off, but making this a subtarget feature
75  // for SI has the unhelpful behavior that it unsets everything else if you
76  // disable it.
77 
78  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
79 
80  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
81  FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
82 
83  // FIXME: I don't think think Evergreen has any useful support for
84  // denormals, but should be checked. Should we issue a warning somewhere
85  // if someone tries to enable these?
87  FullFS += "+fp64-fp16-denormals,";
88  } else {
89  FullFS += "-fp32-denormals,";
90  }
91 
92  FullFS += FS;
93 
94  ParseSubtargetFeatures(GPU, FullFS);
95 
96  // We don't support FP64 for EG/NI atm.
98 
99  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
100  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
101  // variants of MUBUF instructions.
102  if (!hasAddr64() && !FS.contains("flat-for-global")) {
103  FlatForGlobal = true;
104  }
105 
106  // Set defaults if needed.
107  if (MaxPrivateElementSize == 0)
108  MaxPrivateElementSize = 4;
109 
110  if (LDSBankCount == 0)
111  LDSBankCount = 32;
112 
113  if (TT.getArch() == Triple::amdgcn) {
114  if (LocalMemorySize == 0)
115  LocalMemorySize = 32768;
116 
117  // Do something sensible for unspecified target.
118  if (!HasMovrel && !HasVGPRIndexMode)
119  HasMovrel = true;
120  }
121 
123 
124  return *this;
125 }
126 
128  TargetTriple(TT),
133  HasSDWA(false),
135  HasMulI24(true),
136  HasMulU24(true),
141  LocalMemorySize(0),
142  WavefrontSize(0)
143  { }
144 
146  const GCNTargetMachine &TM) :
147  AMDGPUGenSubtargetInfo(TT, GPU, FS),
148  AMDGPUSubtarget(TT),
149  TargetTriple(TT),
150  Gen(SOUTHERN_ISLANDS),
151  IsaVersion(ISAVersion0_0_0),
152  InstrItins(getInstrItineraryForCPU(GPU)),
153  LDSBankCount(0),
154  MaxPrivateElementSize(0),
155 
156  FastFMAF32(false),
157  HalfRate64Ops(false),
158 
159  FP64FP16Denormals(false),
160  DX10Clamp(false),
161  FlatForGlobal(false),
162  AutoWaitcntBeforeBarrier(false),
163  CodeObjectV3(false),
164  UnalignedScratchAccess(false),
165  UnalignedBufferAccess(false),
166 
167  HasApertureRegs(false),
168  EnableXNACK(false),
169  TrapHandler(false),
170  DebuggerInsertNops(false),
171  DebuggerEmitPrologue(false),
172 
173  EnableHugePrivateBuffer(false),
174  EnableVGPRSpilling(false),
176  EnableUnsafeDSOffsetFolding(false),
177  EnableSIScheduler(false),
178  EnableDS128(false),
179  DumpCode(false),
180 
181  FP64(false),
182  GCN3Encoding(false),
183  CIInsts(false),
184  VIInsts(false),
185  GFX9Insts(false),
186  SGPRInitBug(false),
187  HasSMemRealTime(false),
188  HasIntClamp(false),
189  HasFmaMixInsts(false),
190  HasMovrel(false),
191  HasVGPRIndexMode(false),
192  HasScalarStores(false),
193  HasScalarAtomics(false),
194  HasSDWAOmod(false),
195  HasSDWAScalar(false),
196  HasSDWASdst(false),
197  HasSDWAMac(false),
198  HasSDWAOutModsVOPC(false),
199  HasDPP(false),
200  HasR128A16(false),
201  HasDLInsts(false),
202  D16PreservesUnusedBits(false),
203  FlatAddressSpace(false),
204  FlatInstOffsets(false),
205  FlatGlobalInsts(false),
206  FlatScratchInsts(false),
207  AddNoCarryInsts(false),
208  HasUnpackedD16VMem(false),
209 
211 
212  FeatureDisable(false),
213  InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
214  TLInfo(TM, *this),
215  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
216  CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
217  Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
218  RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
219  InstSelector.reset(new AMDGPUInstructionSelector(
220  *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
221 }
222 
224  const Function &F) const {
225  if (NWaves == 1)
226  return getLocalMemorySize();
227  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
228  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
229  unsigned MaxWaves = getMaxWavesPerEU();
230  return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
231 }
232 
234  const Function &F) const {
235  unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
236  unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
237  unsigned MaxWaves = getMaxWavesPerEU();
238  unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
239  unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
240  NumWaves = std::min(NumWaves, MaxWaves);
241  NumWaves = std::max(NumWaves, 1u);
242  return NumWaves;
243 }
244 
245 unsigned
247  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
248  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
249 }
250 
251 std::pair<unsigned, unsigned>
253  switch (CC) {
257  return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
264  return std::make_pair(1, getWavefrontSize());
265  default:
266  return std::make_pair(1, 16 * getWavefrontSize());
267  }
268 }
269 
270 std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
271  const Function &F) const {
272  // FIXME: 1024 if function.
273  // Default minimum/maximum flat work group sizes.
274  std::pair<unsigned, unsigned> Default =
276 
277  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
278  // starts using "amdgpu-flat-work-group-size" attribute.
279  Default.second = AMDGPU::getIntegerAttribute(
280  F, "amdgpu-max-work-group-size", Default.second);
281  Default.first = std::min(Default.first, Default.second);
282 
283  // Requested minimum/maximum flat work group sizes.
284  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
285  F, "amdgpu-flat-work-group-size", Default);
286 
287  // Make sure requested minimum is less than requested maximum.
288  if (Requested.first > Requested.second)
289  return Default;
290 
291  // Make sure requested values do not violate subtarget's specifications.
292  if (Requested.first < getMinFlatWorkGroupSize())
293  return Default;
294  if (Requested.second > getMaxFlatWorkGroupSize())
295  return Default;
296 
297  return Requested;
298 }
299 
300 std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
301  const Function &F) const {
302  // Default minimum/maximum number of waves per execution unit.
303  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
304 
305  // Default/requested minimum/maximum flat work group sizes.
306  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
307 
308  // If minimum/maximum flat work group sizes were explicitly requested using
309  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
310  // number of waves per execution unit to values implied by requested
311  // minimum/maximum flat work group sizes.
312  unsigned MinImpliedByFlatWorkGroupSize =
313  getMaxWavesPerEU(FlatWorkGroupSizes.second);
314  bool RequestedFlatWorkGroupSize = false;
315 
316  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
317  // starts using "amdgpu-flat-work-group-size" attribute.
318  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
319  F.hasFnAttribute("amdgpu-flat-work-group-size")) {
320  Default.first = MinImpliedByFlatWorkGroupSize;
321  RequestedFlatWorkGroupSize = true;
322  }
323 
324  // Requested minimum/maximum number of waves per execution unit.
325  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
326  F, "amdgpu-waves-per-eu", Default, true);
327 
328  // Make sure requested minimum is less than requested maximum.
329  if (Requested.second && Requested.first > Requested.second)
330  return Default;
331 
332  // Make sure requested values do not violate subtarget's specifications.
333  if (Requested.first < getMinWavesPerEU() ||
334  Requested.first > getMaxWavesPerEU())
335  return Default;
336  if (Requested.second > getMaxWavesPerEU())
337  return Default;
338 
339  // Make sure requested values are compatible with values implied by requested
340  // minimum/maximum flat work group sizes.
341  if (RequestedFlatWorkGroupSize &&
342  Requested.first < MinImpliedByFlatWorkGroupSize)
343  return Default;
344 
345  return Requested;
346 }
347 
349  Function *Kernel = I->getParent()->getParent();
350  unsigned MinSize = 0;
351  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
352  bool IdQuery = false;
353 
354  // If reqd_work_group_size is present it narrows value down.
355  if (auto *CI = dyn_cast<CallInst>(I)) {
356  const Function *F = CI->getCalledFunction();
357  if (F) {
358  unsigned Dim = UINT_MAX;
359  switch (F->getIntrinsicID()) {
360  case Intrinsic::amdgcn_workitem_id_x:
361  case Intrinsic::r600_read_tidig_x:
362  IdQuery = true;
364  case Intrinsic::r600_read_local_size_x:
365  Dim = 0;
366  break;
367  case Intrinsic::amdgcn_workitem_id_y:
368  case Intrinsic::r600_read_tidig_y:
369  IdQuery = true;
371  case Intrinsic::r600_read_local_size_y:
372  Dim = 1;
373  break;
374  case Intrinsic::amdgcn_workitem_id_z:
375  case Intrinsic::r600_read_tidig_z:
376  IdQuery = true;
378  case Intrinsic::r600_read_local_size_z:
379  Dim = 2;
380  break;
381  default:
382  break;
383  }
384  if (Dim <= 3) {
385  if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
386  if (Node->getNumOperands() == 3)
387  MinSize = MaxSize = mdconst::extract<ConstantInt>(
388  Node->getOperand(Dim))->getZExtValue();
389  }
390  }
391  }
392 
393  if (!MaxSize)
394  return false;
395 
396  // Range metadata is [Lo, Hi). For ID query we need to pass max size
397  // as Hi. For size query we need to pass Hi + 1.
398  if (IdQuery)
399  MinSize = 0;
400  else
401  ++MaxSize;
402 
403  MDBuilder MDB(I->getContext());
404  MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
405  APInt(32, MaxSize));
406  I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
407  return true;
408 }
409 
411  unsigned &MaxAlign) const {
414 
415  const DataLayout &DL = F.getParent()->getDataLayout();
416  uint64_t ExplicitArgBytes = 0;
417  MaxAlign = 1;
418 
419  for (const Argument &Arg : F.args()) {
420  Type *ArgTy = Arg.getType();
421 
422  unsigned Align = DL.getABITypeAlignment(ArgTy);
423  uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
424  ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize;
425  MaxAlign = std::max(MaxAlign, Align);
426  }
427 
428  return ExplicitArgBytes;
429 }
430 
432  unsigned &MaxAlign) const {
433  uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
434 
435  unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
436 
437  uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
438  unsigned ImplicitBytes = getImplicitArgNumBytes(F);
439  if (ImplicitBytes != 0) {
440  unsigned Alignment = getAlignmentForImplicitArgPtr();
441  TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
442  }
443 
444  // Being able to dereference past the end is useful for emitting scalar loads.
445  return alignTo(TotalSize, 4);
446 }
447 
449  const TargetMachine &TM) :
450  R600GenSubtargetInfo(TT, GPU, FS),
451  AMDGPUSubtarget(TT),
452  InstrInfo(*this),
453  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
454  FMA(false),
455  CaymanISA(false),
456  CFALUBug(false),
457  DX10Clamp(false),
460  FP64(false),
461  TexVTXClauseSize(0),
462  Gen(R600),
463  TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
464  InstrItins(getInstrItineraryForCPU(GPU)) { }
465 
467  unsigned NumRegionInstrs) const {
468  // Track register pressure so the scheduler can try to decrease
469  // pressure once register usage is above the threshold defined by
470  // SIRegisterInfo::getRegPressureSetLimit()
471  Policy.ShouldTrackPressure = true;
472 
473  // Enabling both top down and bottom up scheduling seems to give us less
474  // register spills than just using one of these approaches on its own.
475  Policy.OnlyTopDown = false;
476  Policy.OnlyBottomUp = false;
477 
478  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
479  if (!enableSIScheduler())
480  Policy.ShouldTrackLaneMasks = true;
481 }
482 
484  return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
485 }
486 
487 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
489  if (SGPRs <= 80)
490  return 10;
491  if (SGPRs <= 88)
492  return 9;
493  if (SGPRs <= 100)
494  return 8;
495  return 7;
496  }
497  if (SGPRs <= 48)
498  return 10;
499  if (SGPRs <= 56)
500  return 9;
501  if (SGPRs <= 64)
502  return 8;
503  if (SGPRs <= 72)
504  return 7;
505  if (SGPRs <= 80)
506  return 6;
507  return 5;
508 }
509 
510 unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
511  if (VGPRs <= 24)
512  return 10;
513  if (VGPRs <= 28)
514  return 9;
515  if (VGPRs <= 32)
516  return 8;
517  if (VGPRs <= 36)
518  return 7;
519  if (VGPRs <= 40)
520  return 6;
521  if (VGPRs <= 48)
522  return 5;
523  if (VGPRs <= 64)
524  return 4;
525  if (VGPRs <= 84)
526  return 3;
527  if (VGPRs <= 128)
528  return 2;
529  return 1;
530 }
531 
534  if (MFI.hasFlatScratchInit()) {
536  return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
538  return 4; // FLAT_SCRATCH, VCC (in that order).
539  }
540 
541  if (isXNACKEnabled())
542  return 4; // XNACK, VCC (in that order).
543  return 2; // VCC.
544 }
545 
547  const Function &F = MF.getFunction();
549 
550  // Compute maximum number of SGPRs function can use using default/requested
551  // minimum number of waves per execution unit.
552  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
553  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
554  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
555 
556  // Check if maximum number of SGPRs was explicitly requested using
557  // "amdgpu-num-sgpr" attribute.
558  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
559  unsigned Requested = AMDGPU::getIntegerAttribute(
560  F, "amdgpu-num-sgpr", MaxNumSGPRs);
561 
562  // Make sure requested value does not violate subtarget's specifications.
563  if (Requested && (Requested <= getReservedNumSGPRs(MF)))
564  Requested = 0;
565 
566  // If more SGPRs are required to support the input user/system SGPRs,
567  // increase to accommodate them.
568  //
569  // FIXME: This really ends up using the requested number of SGPRs + number
570  // of reserved special registers in total. Theoretically you could re-use
571  // the last input registers for these special registers, but this would
572  // require a lot of complexity to deal with the weird aliasing.
573  unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
574  if (Requested && Requested < InputNumSGPRs)
575  Requested = InputNumSGPRs;
576 
577  // Make sure requested value is compatible with values implied by
578  // default/requested minimum/maximum number of waves per execution unit.
579  if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
580  Requested = 0;
581  if (WavesPerEU.second &&
582  Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
583  Requested = 0;
584 
585  if (Requested)
586  MaxNumSGPRs = Requested;
587  }
588 
589  if (hasSGPRInitBug())
591 
592  return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
593  MaxAddressableNumSGPRs);
594 }
595 
597  const Function &F = MF.getFunction();
599 
600  // Compute maximum number of VGPRs function can use using default/requested
601  // minimum number of waves per execution unit.
602  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
603  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
604 
605  // Check if maximum number of VGPRs was explicitly requested using
606  // "amdgpu-num-vgpr" attribute.
607  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
608  unsigned Requested = AMDGPU::getIntegerAttribute(
609  F, "amdgpu-num-vgpr", MaxNumVGPRs);
610 
611  // Make sure requested value is compatible with values implied by
612  // default/requested minimum/maximum number of waves per execution unit.
613  if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
614  Requested = 0;
615  if (WavesPerEU.second &&
616  Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
617  Requested = 0;
618 
619  if (Requested)
620  MaxNumVGPRs = Requested;
621  }
622 
623  return MaxNumVGPRs;
624 }
625 
626 namespace {
627 struct MemOpClusterMutation : ScheduleDAGMutation {
628  const SIInstrInfo *TII;
629 
630  MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
631 
632  void apply(ScheduleDAGInstrs *DAGInstrs) override {
633  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
634 
635  SUnit *SUa = nullptr;
636  // Search for two consequent memory operations and link them
637  // to prevent scheduler from moving them apart.
638  // In DAG pre-process SUnits are in the original order of
639  // the instructions before scheduling.
640  for (SUnit &SU : DAG->SUnits) {
641  MachineInstr &MI2 = *SU.getInstr();
642  if (!MI2.mayLoad() && !MI2.mayStore()) {
643  SUa = nullptr;
644  continue;
645  }
646  if (!SUa) {
647  SUa = &SU;
648  continue;
649  }
650 
651  MachineInstr &MI1 = *SUa->getInstr();
652  if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) ||
653  (TII->isFLAT(MI1) && TII->isFLAT(MI2)) ||
654  (TII->isSMRD(MI1) && TII->isSMRD(MI2)) ||
655  (TII->isDS(MI1) && TII->isDS(MI2))) {
656  SU.addPredBarrier(SUa);
657 
658  for (const SDep &SI : SU.Preds) {
659  if (SI.getSUnit() != SUa)
660  SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
661  }
662 
663  if (&SU != &DAG->ExitSU) {
664  for (const SDep &SI : SUa->Succs) {
665  if (SI.getSUnit() != &SU)
666  SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
667  }
668  }
669  }
670 
671  SUa = &SU;
672  }
673  }
674 };
675 } // namespace
676 
678  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
679  Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
680 }
681 
684  return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
685  else
686  return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<R600Subtarget>());
687 }
688 
690  if (TM.getTargetTriple().getArch() == Triple::amdgcn)
691  return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
692  else
693  return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
694 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:448
unsigned getImplicitArgNumBytes(const Function &F) const
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
This file describes how to lower LLVM calls to machine code calls.
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
Mutate the DAG as a postpass after normal DAG building.
Metadata node.
Definition: Metadata.h:864
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
block Block Frequency true
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Generation getGeneration() const
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:425
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:435
static const AMDGPUSubtarget & get(const MachineFunction &MF)
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:459
This file declares the targeting of the InstructionSelector class for AMDGPU.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
const HexagonInstrInfo * TII
int getLocalMemorySize() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1444
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1173
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:286
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
Scheduling dependency.
Definition: ScheduleDAG.h:50
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
unsigned getStackAlignment() const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const Triple & getTargetTriple() const
std::pair< unsigned, unsigned > getWavesPerEU() const
The AMDGPU TargetMachine interface definition for hw codgen targets.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
unsigned getWavefrontSize() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
Information about stack frame layout on the target.
bool hasCaymanISA() const
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:388
unsigned getAlignmentForImplicitArgPtr() const
This class provides the information for the target register banks.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool isVGPRSpillingEnabled(const Function &F) const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
Class for arbitrary precision integers.
Definition: APInt.h:70
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
bool isShader(CallingConv::ID cc)
This file declares the targeting of the Machinelegalizer class for AMDGPU.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
amdgpu Simplify well known AMD library false Value Value * Arg
Provides AMDGPU specific target descriptions.
A ScheduleDAG for scheduling lists of MachineInstr.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
Representation of each machine instruction.
Definition: MachineInstr.h:64
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:568
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
AMDGPUSubtarget(const Triple &TT)
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:321
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
~GCNSubtarget() override
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:566
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
R600Subtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
iterator_range< arg_iterator > args()
Definition: Function.h:689
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
const BasicBlock * getParent() const
Definition: Instruction.h:67
const SIRegisterInfo * getRegisterInfo() const override