LLVM  8.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <memory>
38 #include <utility>
39 
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 #define GET_SUBTARGETINFO_HEADER
43 #include "R600GenSubtargetInfo.inc"
44 
45 namespace llvm {
46 
47 class StringRef;
48 
50 public:
51  enum Generation {
52  R600 = 0,
53  R700 = 1,
54  EVERGREEN = 2,
59  GFX9 = 7
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdHsaOrMesa(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool hasTrigReducedRange() const {
183  return HasTrigReducedRange;
184  }
185 
186  bool isPromoteAllocaEnabled() const {
187  return EnablePromoteAlloca;
188  }
189 
190  unsigned getWavefrontSize() const {
191  return WavefrontSize;
192  }
193 
194  int getLocalMemorySize() const {
195  return LocalMemorySize;
196  }
197 
198  unsigned getAlignmentForImplicitArgPtr() const {
199  return isAmdHsaOS() ? 8 : 4;
200  }
201 
202  /// Returns the offset in bytes from the start of the input buffer
203  /// of the first explicit kernel argument.
204  unsigned getExplicitKernelArgOffset(const Function &F) const {
205  return isAmdHsaOrMesa(F) ? 0 : 36;
206  }
207 
208  /// \returns Maximum number of work groups per compute unit supported by the
209  /// subtarget and limited by given \p FlatWorkGroupSize.
210  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
211 
212  /// \returns Minimum flat work group size supported by the subtarget.
213  virtual unsigned getMinFlatWorkGroupSize() const = 0;
214 
215  /// \returns Maximum flat work group size supported by the subtarget.
216  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
217 
218  /// \returns Maximum number of waves per execution unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum number of waves per execution unit supported by the
223  /// subtarget.
224  virtual unsigned getMinWavesPerEU() const = 0;
225 
226  unsigned getMaxWavesPerEU() const { return 10; }
227 
228  /// Creates value range metadata on an workitemid.* inrinsic call or load.
229  bool makeLIDRangeMetadata(Instruction *I) const;
230 
231  /// \returns Number of bytes of arguments that are passed to a shader or
232  /// kernel in addition to the explicit ones declared for the function.
233  unsigned getImplicitArgNumBytes(const Function &F) const {
234  if (isMesaKernel(F))
235  return 16;
236  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
237  }
238  uint64_t getExplicitKernArgSize(const Function &F,
239  unsigned &MaxAlign) const;
240  unsigned getKernArgSegmentSize(const Function &F,
241  unsigned &MaxAlign) const;
242 
243  virtual ~AMDGPUSubtarget() {}
244 };
245 
247  public AMDGPUSubtarget {
248 public:
249  enum {
266  };
267 
269  TrapHandlerAbiNone = 0,
270  TrapHandlerAbiHsa = 1
271  };
272 
273  enum TrapID {
274  TrapIDHardwareReserved = 0,
275  TrapIDHSADebugTrap = 1,
276  TrapIDLLVMTrap = 2,
277  TrapIDLLVMDebugTrap = 3,
278  TrapIDDebugBreakpoint = 7,
279  TrapIDDebugReserved8 = 8,
280  TrapIDDebugReservedFE = 0xfe,
281  TrapIDDebugReservedFF = 0xff
282  };
283 
285  LLVMTrapHandlerRegValue = 1
286  };
287 
288 private:
289  /// GlobalISel related APIs.
290  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
291  std::unique_ptr<InstructionSelector> InstSelector;
292  std::unique_ptr<LegalizerInfo> Legalizer;
293  std::unique_ptr<RegisterBankInfo> RegBankInfo;
294 
295 protected:
296  // Basic subtarget description.
298  unsigned Gen;
299  unsigned IsaVersion;
303 
304  // Possibly statically set by tablegen, but may want to be overridden.
307 
308  // Dynamially set bits that enable features.
310  bool DX10Clamp;
321 
322  // Used as options.
329  bool DumpCode;
330 
331  // Subtarget statically properties set by tablegen
332  bool FP64;
333  bool FMA;
334  bool MIMG_R128;
335  bool IsGCN;
337  bool CIInsts;
338  bool VIInsts;
339  bool GFX9Insts;
344  bool HasMovrel;
353  bool HasDPP;
364  bool CaymanISA;
365  bool CFALUBug;
369 
370  // Dummy feature to use for assembler in tablegen.
372 
374 private:
375  SIInstrInfo InstrInfo;
376  SITargetLowering TLInfo;
377  SIFrameLowering FrameLowering;
378 
379 public:
380  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
381  const GCNTargetMachine &TM);
382  ~GCNSubtarget() override;
383 
384  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
385  StringRef GPU, StringRef FS);
386 
387  const SIInstrInfo *getInstrInfo() const override {
388  return &InstrInfo;
389  }
390 
391  const SIFrameLowering *getFrameLowering() const override {
392  return &FrameLowering;
393  }
394 
395  const SITargetLowering *getTargetLowering() const override {
396  return &TLInfo;
397  }
398 
399  const SIRegisterInfo *getRegisterInfo() const override {
400  return &InstrInfo.getRegisterInfo();
401  }
402 
403  const CallLowering *getCallLowering() const override {
404  return CallLoweringInfo.get();
405  }
406 
407  const InstructionSelector *getInstructionSelector() const override {
408  return InstSelector.get();
409  }
410 
411  const LegalizerInfo *getLegalizerInfo() const override {
412  return Legalizer.get();
413  }
414 
415  const RegisterBankInfo *getRegBankInfo() const override {
416  return RegBankInfo.get();
417  }
418 
419  // Nothing implemented, just prevent crashes on use.
420  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
421  return &TSInfo;
422  }
423 
424  const InstrItineraryData *getInstrItineraryData() const override {
425  return &InstrItins;
426  }
427 
428  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
429 
431  return (Generation)Gen;
432  }
433 
434  unsigned getWavefrontSizeLog2() const {
435  return Log2_32(WavefrontSize);
436  }
437 
438  int getLDSBankCount() const {
439  return LDSBankCount;
440  }
441 
442  unsigned getMaxPrivateElementSize() const {
443  return MaxPrivateElementSize;
444  }
445 
446  bool hasIntClamp() const {
447  return HasIntClamp;
448  }
449 
450  bool hasFP64() const {
451  return FP64;
452  }
453 
454  bool hasMIMG_R128() const {
455  return MIMG_R128;
456  }
457 
458  bool hasHWFP64() const {
459  return FP64;
460  }
461 
462  bool hasFastFMAF32() const {
463  return FastFMAF32;
464  }
465 
466  bool hasHalfRate64Ops() const {
467  return HalfRate64Ops;
468  }
469 
470  bool hasAddr64() const {
471  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
472  }
473 
474  bool hasBFE() const {
475  return true;
476  }
477 
478  bool hasBFI() const {
479  return true;
480  }
481 
482  bool hasBFM() const {
483  return hasBFE();
484  }
485 
486  bool hasBCNT(unsigned Size) const {
487  return true;
488  }
489 
490  bool hasFFBL() const {
491  return true;
492  }
493 
494  bool hasFFBH() const {
495  return true;
496  }
497 
498  bool hasMed3_16() const {
499  return getGeneration() >= AMDGPUSubtarget::GFX9;
500  }
501 
502  bool hasMin3Max3_16() const {
503  return getGeneration() >= AMDGPUSubtarget::GFX9;
504  }
505 
506  bool hasFmaMixInsts() const {
507  return HasFmaMixInsts;
508  }
509 
510  bool hasCARRY() const {
511  return true;
512  }
513 
514  bool hasFMA() const {
515  return FMA;
516  }
517 
519  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
520  }
521 
522  bool enableHugePrivateBuffer() const {
523  return EnableHugePrivateBuffer;
524  }
525 
527  return EnableUnsafeDSOffsetFolding;
528  }
529 
530  bool dumpCode() const {
531  return DumpCode;
532  }
533 
534  /// Return the amount of LDS that can be used that will not restrict the
535  /// occupancy lower than WaveCount.
536  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
537  const Function &) const;
538 
539  bool hasFP16Denormals() const {
540  return FP64FP16Denormals;
541  }
542 
543  bool hasFP64Denormals() const {
544  return FP64FP16Denormals;
545  }
546 
548  return getGeneration() >= AMDGPUSubtarget::GFX9;
549  }
550 
551  bool enableDX10Clamp() const {
552  return DX10Clamp;
553  }
554 
555  bool enableIEEEBit(const MachineFunction &MF) const {
557  }
558 
559  bool useFlatForGlobal() const {
560  return FlatForGlobal;
561  }
562 
563  /// \returns If target supports ds_read/write_b128 and user enables generation
564  /// of ds_read/write_b128.
565  bool useDS128() const {
566  return CIInsts && EnableDS128;
567  }
568 
569  /// \returns If MUBUF instructions always perform range checking, even for
570  /// buffer resources used for private memory access.
572  return getGeneration() < AMDGPUSubtarget::GFX9;
573  }
574 
576  return AutoWaitcntBeforeBarrier;
577  }
578 
579  bool hasCodeObjectV3() const {
580  return CodeObjectV3;
581  }
582 
584  return UnalignedBufferAccess;
585  }
586 
588  return UnalignedScratchAccess;
589  }
590 
591  bool hasApertureRegs() const {
592  return HasApertureRegs;
593  }
594 
595  bool isTrapHandlerEnabled() const {
596  return TrapHandler;
597  }
598 
599  bool isXNACKEnabled() const {
600  return EnableXNACK;
601  }
602 
603  bool hasFlatAddressSpace() const {
604  return FlatAddressSpace;
605  }
606 
607  bool hasFlatInstOffsets() const {
608  return FlatInstOffsets;
609  }
610 
611  bool hasFlatGlobalInsts() const {
612  return FlatGlobalInsts;
613  }
614 
615  bool hasFlatScratchInsts() const {
616  return FlatScratchInsts;
617  }
618 
620  return getGeneration() > GFX9;
621  }
622 
623  bool hasD16LoadStore() const {
624  return getGeneration() >= GFX9;
625  }
626 
627  /// Return if most LDS instructions have an m0 use that require m0 to be
628  /// iniitalized.
629  bool ldsRequiresM0Init() const {
630  return getGeneration() < GFX9;
631  }
632 
633  bool hasAddNoCarry() const {
634  return AddNoCarryInsts;
635  }
636 
637  bool hasUnpackedD16VMem() const {
638  return HasUnpackedD16VMem;
639  }
640 
641  // Covers VS/PS/CS graphics shaders
642  bool isMesaGfxShader(const Function &F) const {
643  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
644  }
645 
646  bool hasMad64_32() const {
647  return getGeneration() >= SEA_ISLANDS;
648  }
649 
650  bool hasSDWAOmod() const {
651  return HasSDWAOmod;
652  }
653 
654  bool hasSDWAScalar() const {
655  return HasSDWAScalar;
656  }
657 
658  bool hasSDWASdst() const {
659  return HasSDWASdst;
660  }
661 
662  bool hasSDWAMac() const {
663  return HasSDWAMac;
664  }
665 
666  bool hasSDWAOutModsVOPC() const {
667  return HasSDWAOutModsVOPC;
668  }
669 
671  return getGeneration() < SEA_ISLANDS;
672  }
673 
674  bool hasDLInsts() const {
675  return HasDLInsts;
676  }
677 
678  bool d16PreservesUnusedBits() const {
679  return D16PreservesUnusedBits;
680  }
681 
682  // Scratch is allocated in 256 dword per wave blocks for the entire
683  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
684  // is 4-byte aligned.
685  //
686  // Only 4-byte alignment is really needed to access anything. Transformations
687  // on the pointer value itself may rely on the alignment / known low bits of
688  // the pointer. Set this to something above the minimum to avoid needing
689  // dynamic realignment in common cases.
690  unsigned getStackAlignment() const {
691  return 16;
692  }
693 
694  bool enableMachineScheduler() const override {
695  return true;
696  }
697 
698  bool enableSubRegLiveness() const override {
699  return true;
700  }
701 
702  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
704 
705  /// \returns Number of execution units per compute unit supported by the
706  /// subtarget.
707  unsigned getEUsPerCU() const {
708  return AMDGPU::IsaInfo::getEUsPerCU(this);
709  }
710 
711  /// \returns Maximum number of waves per compute unit supported by the
712  /// subtarget without any kind of limitation.
713  unsigned getMaxWavesPerCU() const {
715  }
716 
717  /// \returns Maximum number of waves per compute unit supported by the
718  /// subtarget and limited by given \p FlatWorkGroupSize.
719  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
720  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
721  }
722 
723  /// \returns Maximum number of waves per execution unit supported by the
724  /// subtarget without any kind of limitation.
725  unsigned getMaxWavesPerEU() const {
727  }
728 
729  /// \returns Number of waves per work group supported by the subtarget and
730  /// limited by given \p FlatWorkGroupSize.
731  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
732  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
733  }
734 
735  // static wrappers
736  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
737 
738  // XXX - Why is this here if it isn't in the default pass set?
739  bool enableEarlyIfConversion() const override {
740  return true;
741  }
742 
743  void overrideSchedPolicy(MachineSchedPolicy &Policy,
744  unsigned NumRegionInstrs) const override;
745 
746  bool isVGPRSpillingEnabled(const Function &F) const;
747 
748  unsigned getMaxNumUserSGPRs() const {
749  return 16;
750  }
751 
752  bool hasSMemRealTime() const {
753  return HasSMemRealTime;
754  }
755 
756  bool hasMovrel() const {
757  return HasMovrel;
758  }
759 
760  bool hasVGPRIndexMode() const {
761  return HasVGPRIndexMode;
762  }
763 
764  bool useVGPRIndexMode(bool UserEnable) const {
765  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
766  }
767 
768  bool hasScalarCompareEq64() const {
769  return getGeneration() >= VOLCANIC_ISLANDS;
770  }
771 
772  bool hasScalarStores() const {
773  return HasScalarStores;
774  }
775 
776  bool hasScalarAtomics() const {
777  return HasScalarAtomics;
778  }
779 
780 
781  bool hasDPP() const {
782  return HasDPP;
783  }
784 
785  bool hasR128A16() const {
786  return HasR128A16;
787  }
788 
789  bool enableSIScheduler() const {
790  return EnableSIScheduler;
791  }
792 
793  bool debuggerSupported() const {
794  return debuggerInsertNops() && debuggerEmitPrologue();
795  }
796 
797  bool debuggerInsertNops() const {
798  return DebuggerInsertNops;
799  }
800 
801  bool debuggerEmitPrologue() const {
802  return DebuggerEmitPrologue;
803  }
804 
805  bool loadStoreOptEnabled() const {
806  return EnableLoadStoreOpt;
807  }
808 
809  bool hasSGPRInitBug() const {
810  return SGPRInitBug;
811  }
812 
813  bool has12DWordStoreHazard() const {
814  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
815  }
816 
817  bool hasSMovFedHazard() const {
818  return getGeneration() >= AMDGPUSubtarget::GFX9;
819  }
820 
822  return getGeneration() >= AMDGPUSubtarget::GFX9;
823  }
824 
825  bool hasReadM0SendMsgHazard() const {
826  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
827  }
828 
829  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
830  /// SGPRs
831  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
832 
833  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
834  /// VGPRs
835  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
836 
837  /// \returns true if the flat_scratch register should be initialized with the
838  /// pointer to the wave's scratch memory rather than a size and offset.
839  bool flatScratchIsPointer() const {
840  return getGeneration() >= AMDGPUSubtarget::GFX9;
841  }
842 
843  /// \returns true if the machine has merged shaders in which s0-s7 are
844  /// reserved by the hardware and user SGPRs start at s8
845  bool hasMergedShaders() const {
846  return getGeneration() >= GFX9;
847  }
848 
849  /// \returns SGPR allocation granularity supported by the subtarget.
850  unsigned getSGPRAllocGranule() const {
852  }
853 
854  /// \returns SGPR encoding granularity supported by the subtarget.
855  unsigned getSGPREncodingGranule() const {
857  }
858 
859  /// \returns Total number of SGPRs supported by the subtarget.
860  unsigned getTotalNumSGPRs() const {
862  }
863 
864  /// \returns Addressable number of SGPRs supported by the subtarget.
865  unsigned getAddressableNumSGPRs() const {
867  }
868 
869  /// \returns Minimum number of SGPRs that meets the given number of waves per
870  /// execution unit requirement supported by the subtarget.
871  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
872  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
873  }
874 
875  /// \returns Maximum number of SGPRs that meets the given number of waves per
876  /// execution unit requirement supported by the subtarget.
877  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
878  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
879  }
880 
881  /// \returns Reserved number of SGPRs for given function \p MF.
882  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
883 
884  /// \returns Maximum number of SGPRs that meets number of waves per execution
885  /// unit requirement for function \p MF, or number of SGPRs explicitly
886  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
887  ///
888  /// \returns Value that meets number of waves per execution unit requirement
889  /// if explicitly requested value cannot be converted to integer, violates
890  /// subtarget's specifications, or does not meet number of waves per execution
891  /// unit requirement.
892  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
893 
894  /// \returns VGPR allocation granularity supported by the subtarget.
895  unsigned getVGPRAllocGranule() const {
897  }
898 
899  /// \returns VGPR encoding granularity supported by the subtarget.
900  unsigned getVGPREncodingGranule() const {
902  }
903 
904  /// \returns Total number of VGPRs supported by the subtarget.
905  unsigned getTotalNumVGPRs() const {
907  }
908 
909  /// \returns Addressable number of VGPRs supported by the subtarget.
910  unsigned getAddressableNumVGPRs() const {
912  }
913 
914  /// \returns Minimum number of VGPRs that meets given number of waves per
915  /// execution unit requirement supported by the subtarget.
916  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
917  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
918  }
919 
920  /// \returns Maximum number of VGPRs that meets given number of waves per
921  /// execution unit requirement supported by the subtarget.
922  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
923  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
924  }
925 
926  /// \returns Maximum number of VGPRs that meets number of waves per execution
927  /// unit requirement for function \p MF, or number of VGPRs explicitly
928  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
929  ///
930  /// \returns Value that meets number of waves per execution unit requirement
931  /// if explicitly requested value cannot be converted to integer, violates
932  /// subtarget's specifications, or does not meet number of waves per execution
933  /// unit requirement.
934  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
935 
936  void getPostRAMutations(
937  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
938  const override;
939 
940  /// \returns Maximum number of work groups per compute unit supported by the
941  /// subtarget and limited by given \p FlatWorkGroupSize.
942  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
943  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
944  }
945 
946  /// \returns Minimum flat work group size supported by the subtarget.
947  unsigned getMinFlatWorkGroupSize() const override {
949  }
950 
951  /// \returns Maximum flat work group size supported by the subtarget.
952  unsigned getMaxFlatWorkGroupSize() const override {
954  }
955 
956  /// \returns Maximum number of waves per execution unit supported by the
957  /// subtarget and limited by given \p FlatWorkGroupSize.
958  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
959  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
960  }
961 
962  /// \returns Minimum number of waves per execution unit supported by the
963  /// subtarget.
964  unsigned getMinWavesPerEU() const override {
966  }
967 };
968 
969 class R600Subtarget final : public R600GenSubtargetInfo,
970  public AMDGPUSubtarget {
971 private:
972  R600InstrInfo InstrInfo;
973  R600FrameLowering FrameLowering;
974  bool FMA;
975  bool CaymanISA;
976  bool CFALUBug;
977  bool DX10Clamp;
978  bool HasVertexCache;
979  bool R600ALUInst;
980  bool FP64;
981  short TexVTXClauseSize;
982  Generation Gen;
983  R600TargetLowering TLInfo;
984  InstrItineraryData InstrItins;
985  SelectionDAGTargetInfo TSInfo;
986 
987 public:
988  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
989  const TargetMachine &TM);
990 
991  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
992 
993  const R600FrameLowering *getFrameLowering() const override {
994  return &FrameLowering;
995  }
996 
997  const R600TargetLowering *getTargetLowering() const override {
998  return &TLInfo;
999  }
1000 
1001  const R600RegisterInfo *getRegisterInfo() const override {
1002  return &InstrInfo.getRegisterInfo();
1003  }
1004 
1005  const InstrItineraryData *getInstrItineraryData() const override {
1006  return &InstrItins;
1007  }
1008 
1009  // Nothing implemented, just prevent crashes on use.
1011  return &TSInfo;
1012  }
1013 
1014  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1015 
1017  return Gen;
1018  }
1019 
1020  unsigned getStackAlignment() const {
1021  return 4;
1022  }
1023 
1024  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1025  StringRef GPU, StringRef FS);
1026 
1027  bool hasBFE() const {
1028  return (getGeneration() >= EVERGREEN);
1029  }
1030 
1031  bool hasBFI() const {
1032  return (getGeneration() >= EVERGREEN);
1033  }
1034 
1035  bool hasBCNT(unsigned Size) const {
1036  if (Size == 32)
1037  return (getGeneration() >= EVERGREEN);
1038 
1039  return false;
1040  }
1041 
1042  bool hasBORROW() const {
1043  return (getGeneration() >= EVERGREEN);
1044  }
1045 
1046  bool hasCARRY() const {
1047  return (getGeneration() >= EVERGREEN);
1048  }
1049 
1050  bool hasCaymanISA() const {
1051  return CaymanISA;
1052  }
1053 
1054  bool hasFFBL() const {
1055  return (getGeneration() >= EVERGREEN);
1056  }
1057 
1058  bool hasFFBH() const {
1059  return (getGeneration() >= EVERGREEN);
1060  }
1061 
1062  bool hasFMA() const { return FMA; }
1063 
1064  bool hasCFAluBug() const { return CFALUBug; }
1065 
1066  bool hasVertexCache() const { return HasVertexCache; }
1067 
1068  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1069 
1070  bool enableMachineScheduler() const override {
1071  return true;
1072  }
1073 
1074  bool enableSubRegLiveness() const override {
1075  return true;
1076  }
1077 
1078  /// \returns Maximum number of work groups per compute unit supported by the
1079  /// subtarget and limited by given \p FlatWorkGroupSize.
1080  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1081  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1082  }
1083 
1084  /// \returns Minimum flat work group size supported by the subtarget.
1085  unsigned getMinFlatWorkGroupSize() const override {
1087  }
1088 
1089  /// \returns Maximum flat work group size supported by the subtarget.
1090  unsigned getMaxFlatWorkGroupSize() const override {
1092  }
1093 
1094  /// \returns Maximum number of waves per execution unit supported by the
1095  /// subtarget and limited by given \p FlatWorkGroupSize.
1096  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1097  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1098  }
1099 
1100  /// \returns Minimum number of waves per execution unit supported by the
1101  /// subtarget.
1102  unsigned getMinWavesPerEU() const override {
1103  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1104  }
1105 };
1106 
1107 } // end namespace llvm
1108 
1109 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool enableIEEEBit(const MachineFunction &MF) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasSDWAOmod() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool debuggerSupported() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:298
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasFastFMAF32() const
Generation getGeneration() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:154
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool enableDX10Clamp() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool debuggerInsertNops() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool dumpCode() const
bool debuggerEmitPrologue() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
bool isCompute(CallingConv::ID cc)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
const R600InstrInfo * getInstrInfo() const override
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:47
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:286
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override