LLVM  4.0.0
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "R600InstrInfo.h"
20 #include "R600ISelLowering.h"
21 #include "R600FrameLowering.h"
22 #include "SIInstrInfo.h"
23 #include "SIISelLowering.h"
24 #include "SIFrameLowering.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
32 #include <cassert>
33 #include <cstdint>
34 #include <memory>
35 #include <utility>
36 
37 #define GET_SUBTARGETINFO_HEADER
38 #include "AMDGPUGenSubtargetInfo.inc"
39 
40 namespace llvm {
41 
42 class StringRef;
43 
45 public:
46  enum Generation {
47  R600 = 0,
54  };
55 
56  enum {
67  };
68 
69 protected:
70  // Basic subtarget description.
73  unsigned IsaVersion;
74  unsigned WavefrontSize;
78 
79  // Possibly statically set by tablegen, but may want to be overridden.
80  bool FastFMAF32;
82 
83  // Dynamially set bits that enable features.
95 
96  // Used as options.
102  bool DumpCode;
103 
104  // Subtarget statically properties set by tablegen
105  bool FP64;
106  bool IsGCN;
109  bool CIInsts;
113  bool HasMovrel;
119  bool CaymanISA;
120  bool CFALUBug;
124 
125  // Dummy feature to use for assembler in tablegen.
127 
130 
131 public:
132  AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
133  const TargetMachine &TM);
134  ~AMDGPUSubtarget() override;
135 
137  StringRef GPU, StringRef FS);
138 
139  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
140  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
141  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
142  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
143 
144  const InstrItineraryData *getInstrItineraryData() const override {
145  return &InstrItins;
146  }
147 
148  // Nothing implemented, just prevent crashes on use.
149  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
150  return &TSInfo;
151  }
152 
154 
155  bool isAmdHsaOS() const {
156  return TargetTriple.getOS() == Triple::AMDHSA;
157  }
158 
159  bool isMesa3DOS() const {
160  return TargetTriple.getOS() == Triple::Mesa3D;
161  }
162 
163  bool isOpenCLEnv() const {
165  }
166 
168  return Gen;
169  }
170 
171  unsigned getWavefrontSize() const {
172  return WavefrontSize;
173  }
174 
175  int getLocalMemorySize() const {
176  return LocalMemorySize;
177  }
178 
179  int getLDSBankCount() const {
180  return LDSBankCount;
181  }
182 
183  unsigned getMaxPrivateElementSize() const {
184  return MaxPrivateElementSize;
185  }
186 
187  bool has16BitInsts() const {
188  return Has16BitInsts;
189  }
190 
191  bool hasHWFP64() const {
192  return FP64;
193  }
194 
195  bool hasFastFMAF32() const {
196  return FastFMAF32;
197  }
198 
199  bool hasHalfRate64Ops() const {
200  return HalfRate64Ops;
201  }
202 
203  bool hasAddr64() const {
204  return (getGeneration() < VOLCANIC_ISLANDS);
205  }
206 
207  bool hasBFE() const {
208  return (getGeneration() >= EVERGREEN);
209  }
210 
211  bool hasBFI() const {
212  return (getGeneration() >= EVERGREEN);
213  }
214 
215  bool hasBFM() const {
216  return hasBFE();
217  }
218 
219  bool hasBCNT(unsigned Size) const {
220  if (Size == 32)
221  return (getGeneration() >= EVERGREEN);
222 
223  if (Size == 64)
224  return (getGeneration() >= SOUTHERN_ISLANDS);
225 
226  return false;
227  }
228 
229  bool hasMulU24() const {
230  return (getGeneration() >= EVERGREEN);
231  }
232 
233  bool hasMulI24() const {
234  return (getGeneration() >= SOUTHERN_ISLANDS ||
235  hasCaymanISA());
236  }
237 
238  bool hasFFBL() const {
239  return (getGeneration() >= EVERGREEN);
240  }
241 
242  bool hasFFBH() const {
243  return (getGeneration() >= EVERGREEN);
244  }
245 
246  bool hasCARRY() const {
247  return (getGeneration() >= EVERGREEN);
248  }
249 
250  bool hasBORROW() const {
251  return (getGeneration() >= EVERGREEN);
252  }
253 
254  bool hasCaymanISA() const {
255  return CaymanISA;
256  }
257 
258  bool isPromoteAllocaEnabled() const {
259  return EnablePromoteAlloca;
260  }
261 
264  }
265 
266  bool dumpCode() const {
267  return DumpCode;
268  }
269 
270  bool enableIEEEBit(const MachineFunction &MF) const {
272  }
273 
274  /// Return the amount of LDS that can be used that will not restrict the
275  /// occupancy lower than WaveCount.
276  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
277 
278  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
279  /// the given LDS memory size is the only constraint.
280  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
281 
282  bool hasFP16Denormals() const {
283  return FP16Denormals;
284  }
285 
286  bool hasFP32Denormals() const {
287  return FP32Denormals;
288  }
289 
290  bool hasFP64Denormals() const {
291  return FP64Denormals;
292  }
293 
294  bool hasFPExceptions() const {
295  return FPExceptions;
296  }
297 
298  bool useFlatForGlobal() const {
299  return FlatForGlobal;
300  }
301 
303  return UnalignedBufferAccess;
304  }
305 
307  return UnalignedScratchAccess;
308  }
309 
310  bool isXNACKEnabled() const {
311  return EnableXNACK;
312  }
313 
314  bool isMesaKernel(const MachineFunction &MF) const {
316  }
317 
318  // Covers VS/PS/CS graphics shaders
319  bool isMesaGfxShader(const MachineFunction &MF) const {
321  }
322 
323  bool isAmdCodeObjectV2(const MachineFunction &MF) const {
324  return isAmdHsaOS() || isMesaKernel(MF);
325  }
326 
327  /// \brief Returns the offset in bytes from the start of the input buffer
328  /// of the first explicit kernel argument.
329  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
330  return isAmdCodeObjectV2(MF) ? 0 : 36;
331  }
332 
333  unsigned getAlignmentForImplicitArgPtr() const {
334  return isAmdHsaOS() ? 8 : 4;
335  }
336 
337  unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
338  if (isMesaKernel(MF))
339  return 16;
340  if (isAmdHsaOS() && isOpenCLEnv())
341  return 32;
342  return 0;
343  }
344 
345  unsigned getStackAlignment() const {
346  // Scratch is allocated in 256 dword per wave blocks.
347  return 4 * 256 / getWavefrontSize();
348  }
349 
350  bool enableMachineScheduler() const override {
351  return true;
352  }
353 
354  bool enableSubRegLiveness() const override {
355  return true;
356  }
357 
358  /// \returns Number of execution units per compute unit supported by the
359  /// subtarget.
360  unsigned getEUsPerCU() const {
361  return 4;
362  }
363 
364  /// \returns Maximum number of work groups per compute unit supported by the
365  /// subtarget and limited by given flat work group size.
366  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
368  return 8;
369  return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
370  }
371 
372  /// \returns Maximum number of waves per compute unit supported by the
373  /// subtarget without any kind of limitation.
374  unsigned getMaxWavesPerCU() const {
375  return getMaxWavesPerEU() * getEUsPerCU();
376  }
377 
378  /// \returns Maximum number of waves per compute unit supported by the
379  /// subtarget and limited by given flat work group size.
380  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
381  return getWavesPerWorkGroup(FlatWorkGroupSize);
382  }
383 
384  /// \returns Minimum number of waves per execution unit supported by the
385  /// subtarget.
386  unsigned getMinWavesPerEU() const {
387  return 1;
388  }
389 
390  /// \returns Maximum number of waves per execution unit supported by the
391  /// subtarget without any kind of limitation.
392  unsigned getMaxWavesPerEU() const {
394  return 8;
395  // FIXME: Need to take scratch memory into account.
396  return 10;
397  }
398 
399  /// \returns Maximum number of waves per execution unit supported by the
400  /// subtarget and limited by given flat work group size.
401  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
402  return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
403  getEUsPerCU();
404  }
405 
406  /// \returns Minimum flat work group size supported by the subtarget.
407  unsigned getMinFlatWorkGroupSize() const {
408  return 1;
409  }
410 
411  /// \returns Maximum flat work group size supported by the subtarget.
412  unsigned getMaxFlatWorkGroupSize() const {
413  return 2048;
414  }
415 
416  /// \returns Number of waves per work group given the flat work group size.
417  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
418  return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
419  }
420 
423 
424  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
425  /// for function \p F, or minimum/maximum flat work group sizes explicitly
426  /// requested using "amdgpu-flat-work-group-size" attribute attached to
427  /// function \p F.
428  ///
429  /// \returns Subtarget's default values if explicitly requested values cannot
430  /// be converted to integer, or violate subtarget's specifications.
431  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
432 
433  /// \returns Subtarget's default pair of minimum/maximum number of waves per
434  /// execution unit for function \p F, or minimum/maximum number of waves per
435  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
436  /// attached to function \p F.
437  ///
438  /// \returns Subtarget's default values if explicitly requested values cannot
439  /// be converted to integer, violate subtarget's specifications, or are not
440  /// compatible with minimum/maximum number of waves limited by flat work group
441  /// size, register usage, and/or lds usage.
442  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
443 };
444 
445 class R600Subtarget final : public AMDGPUSubtarget {
446 private:
447  R600InstrInfo InstrInfo;
448  R600FrameLowering FrameLowering;
449  R600TargetLowering TLInfo;
450 
451 public:
452  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
453  const TargetMachine &TM);
454 
455  const R600InstrInfo *getInstrInfo() const override {
456  return &InstrInfo;
457  }
458 
459  const R600FrameLowering *getFrameLowering() const override {
460  return &FrameLowering;
461  }
462 
463  const R600TargetLowering *getTargetLowering() const override {
464  return &TLInfo;
465  }
466 
467  const R600RegisterInfo *getRegisterInfo() const override {
468  return &InstrInfo.getRegisterInfo();
469  }
470 
471  bool hasCFAluBug() const {
472  return CFALUBug;
473  }
474 
475  bool hasVertexCache() const {
476  return HasVertexCache;
477  }
478 
479  short getTexVTXClauseSize() const {
480  return TexVTXClauseSize;
481  }
482 };
483 
484 class SISubtarget final : public AMDGPUSubtarget {
485 public:
486  enum {
487  // The closed Vulkan driver sets 96, which limits the wave count to 8 but
488  // doesn't spill SGPRs as much as when 80 is set.
490  };
491 
492 private:
493  SIInstrInfo InstrInfo;
494  SIFrameLowering FrameLowering;
495  SITargetLowering TLInfo;
496  std::unique_ptr<GISelAccessor> GISel;
497 
498 public:
499  SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
500  const TargetMachine &TM);
501 
502  const SIInstrInfo *getInstrInfo() const override {
503  return &InstrInfo;
504  }
505 
506  const SIFrameLowering *getFrameLowering() const override {
507  return &FrameLowering;
508  }
509 
510  const SITargetLowering *getTargetLowering() const override {
511  return &TLInfo;
512  }
513 
514  const CallLowering *getCallLowering() const override {
515  assert(GISel && "Access to GlobalISel APIs not set");
516  return GISel->getCallLowering();
517  }
518 
519  const SIRegisterInfo *getRegisterInfo() const override {
520  return &InstrInfo.getRegisterInfo();
521  }
522 
524  this->GISel.reset(&GISel);
525  }
526 
528  unsigned NumRegionInstrs) const override;
529 
530  bool isVGPRSpillingEnabled(const Function& F) const;
531 
532  unsigned getMaxNumUserSGPRs() const {
533  return 16;
534  }
535 
536  bool hasFlatAddressSpace() const {
537  return FlatAddressSpace;
538  }
539 
540  bool hasSMemRealTime() const {
541  return HasSMemRealTime;
542  }
543 
544  bool hasMovrel() const {
545  return HasMovrel;
546  }
547 
548  bool hasVGPRIndexMode() const {
549  return HasVGPRIndexMode;
550  }
551 
552  bool hasScalarCompareEq64() const {
553  return getGeneration() >= VOLCANIC_ISLANDS;
554  }
555 
556  bool hasScalarStores() const {
557  return HasScalarStores;
558  }
559 
560  bool hasInv2PiInlineImm() const {
561  return HasInv2PiInlineImm;
562  }
563 
564  bool enableSIScheduler() const {
565  return EnableSIScheduler;
566  }
567 
568  bool debuggerSupported() const {
569  return debuggerInsertNops() && debuggerReserveRegs() &&
571  }
572 
573  bool debuggerInsertNops() const {
574  return DebuggerInsertNops;
575  }
576 
577  bool debuggerReserveRegs() const {
578  return DebuggerReserveRegs;
579  }
580 
581  bool debuggerEmitPrologue() const {
582  return DebuggerEmitPrologue;
583  }
584 
585  bool loadStoreOptEnabled() const {
586  return EnableLoadStoreOpt;
587  }
588 
589  bool hasSGPRInitBug() const {
590  return SGPRInitBug;
591  }
592 
593  bool has12DWordStoreHazard() const {
595  }
596 
597  unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
598 
599  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
600  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
601 
602  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
603  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
604 
605  /// \returns True if waitcnt instruction is needed before barrier instruction,
606  /// false otherwise.
608  return true;
609  }
610 
611  unsigned getMaxNumSGPRs() const;
612 };
613 
614 } // end namespace llvm
615 
616 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:279
bool hasCaymanISA() const
const AMDGPURegisterInfo * getRegisterInfo() const override=0
The goal of this helper class is to gather the accessor to all the APIs related to GlobalISel...
Definition: GISelAccessor.h:29
bool hasVertexCache() const
Interface definition for R600InstrInfo.
bool isVGPRSpillingEnabled(const Function &F) const
bool hasUnalignedBufferAccess() const
int getLDSBankCount() const
bool hasFastFMAF32() const
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SIInstrInfo * getInstrInfo() const override
bool enableIEEEBit(const MachineFunction &MF) const
SelectionDAGTargetInfo TSInfo
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
int getLocalMemorySize() const
bool loadStoreOptEnabled() const
bool hasFPExceptions() const
bool isAmdCodeObjectV2(const MachineFunction &MF) const
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const
bool isMesaKernel(const MachineFunction &MF) const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool enableSIScheduler() const
bool useFlatForGlobal() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
const InstrItineraryData * getInstrItineraryData() const override
bool getScalarizeGlobalBehavior() const
bool isMesaGfxShader(const MachineFunction &MF) const
bool hasCFAluBug() const
unsigned getMaxNumUserSGPRs() const
#define F(x, y, z)
Definition: MD5.cpp:51
bool isXNACKEnabled() const
bool hasVGPRIndexMode() const
SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
const AMDGPUFrameLowering * getFrameLowering() const override=0
Itinerary data supplied by a subtarget to be used by a target.
Generation getGeneration() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:117
bool hasHalfRate64Ops() const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
bool hasSGPRInitBug() const
const R600FrameLowering * getFrameLowering() const override
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxWavesPerEU() const
bool isShader(CallingConv::ID cc)
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const
unsigned getMaxWavesPerCU() const
bool isOpenCLEnv() const
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool hasUnalignedScratchAccess() const
unsigned getMinFlatWorkGroupSize() const
SI DAG Lowering interface definition.
bool isCompute(CallingConv::ID cc)
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
const R600InstrInfo * getInstrInfo() const override
bool hasScalarStores() const
bool hasFP32Denormals() const
bool debuggerReserveRegs() const
bool debuggerSupported() const
bool hasMovrel() const
unsigned getMaxFlatWorkGroupSize() const
bool has16BitInsts() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
const AMDGPUInstrInfo * getInstrInfo() const override=0
bool hasScalarCompareEq64() const
bool hasInv2PiInlineImm() const
bool isPromoteAllocaEnabled() const
bool hasFP64Denormals() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const
Inverse of getMaxLocalMemWithWaveCount.
const R600RegisterInfo * getRegisterInfo() const override
const AMDGPUTargetLowering * getTargetLowering() const override=0
unsigned getMinWavesPerEU() const
const SIRegisterInfo * getRegisterInfo() const override
bool debuggerInsertNops() const
void setScalarizeGlobalBehavior(bool b)
bool isAmdHsaOS() const
void setGISelAccessor(GISelAccessor &GISel)
short getTexVTXClauseSize() const
bool enableSubRegLiveness() const override
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Interface definition for SIInstrInfo.
bool unsafeDSOffsetFoldingEnabled() const
unsigned getMaxPrivateElementSize() const
R600 DAG Lowering interface definition.
Information about the stack frame layout on the AMDGPU targets.
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:69
const CallLowering * getCallLowering() const override
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool isMesa3DOS() const
const SIFrameLowering * getFrameLowering() const override
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
~AMDGPUSubtarget() override
unsigned getWavefrontSize() const
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
unsigned getEUsPerCU() const
EnvironmentType getEnvironment() const
getEnvironment - Get the parsed environment type of this triple.
Definition: Triple.h:288
unsigned getStackAlignment() const
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasSMemRealTime() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool needWaitcntBeforeBarrier() const
InstrItineraryData InstrItins
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
bool hasBCNT(unsigned Size) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool hasFP16Denormals() const
bool enableMachineScheduler() const override
unsigned getMaxNumSGPRs() const
bool debuggerEmitPrologue() const
bool hasFlatAddressSpace() const
bool has12DWordStoreHazard() const
unsigned getAlignmentForImplicitArgPtr() const