LLVM  8.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <memory>
38 #include <utility>
39 
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 #define GET_SUBTARGETINFO_HEADER
43 #include "R600GenSubtargetInfo.inc"
44 
45 namespace llvm {
46 
47 class StringRef;
48 
50 public:
51  enum Generation {
52  R600 = 0,
53  R700 = 1,
54  EVERGREEN = 2,
59  GFX9 = 7
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdHsaOrMesa(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool hasTrigReducedRange() const {
183  return HasTrigReducedRange;
184  }
185 
186  bool isPromoteAllocaEnabled() const {
187  return EnablePromoteAlloca;
188  }
189 
190  unsigned getWavefrontSize() const {
191  return WavefrontSize;
192  }
193 
194  int getLocalMemorySize() const {
195  return LocalMemorySize;
196  }
197 
198  unsigned getAlignmentForImplicitArgPtr() const {
199  return isAmdHsaOS() ? 8 : 4;
200  }
201 
202  /// Returns the offset in bytes from the start of the input buffer
203  /// of the first explicit kernel argument.
204  unsigned getExplicitKernelArgOffset(const Function &F) const {
205  return isAmdHsaOrMesa(F) ? 0 : 36;
206  }
207 
208  /// \returns Maximum number of work groups per compute unit supported by the
209  /// subtarget and limited by given \p FlatWorkGroupSize.
210  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
211 
212  /// \returns Minimum flat work group size supported by the subtarget.
213  virtual unsigned getMinFlatWorkGroupSize() const = 0;
214 
215  /// \returns Maximum flat work group size supported by the subtarget.
216  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
217 
218  /// \returns Maximum number of waves per execution unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum number of waves per execution unit supported by the
223  /// subtarget.
224  virtual unsigned getMinWavesPerEU() const = 0;
225 
226  unsigned getMaxWavesPerEU() const { return 10; }
227 
228  /// Creates value range metadata on an workitemid.* inrinsic call or load.
229  bool makeLIDRangeMetadata(Instruction *I) const;
230 
231  /// \returns Number of bytes of arguments that are passed to a shader or
232  /// kernel in addition to the explicit ones declared for the function.
233  unsigned getImplicitArgNumBytes(const Function &F) const {
234  if (isMesaKernel(F))
235  return 16;
236  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
237  }
238  uint64_t getExplicitKernArgSize(const Function &F,
239  unsigned &MaxAlign) const;
240  unsigned getKernArgSegmentSize(const Function &F,
241  unsigned &MaxAlign) const;
242 
243  virtual ~AMDGPUSubtarget() {}
244 };
245 
247  public AMDGPUSubtarget {
248 public:
249  enum {
267  };
268 
270  TrapHandlerAbiNone = 0,
271  TrapHandlerAbiHsa = 1
272  };
273 
274  enum TrapID {
275  TrapIDHardwareReserved = 0,
276  TrapIDHSADebugTrap = 1,
277  TrapIDLLVMTrap = 2,
278  TrapIDLLVMDebugTrap = 3,
279  TrapIDDebugBreakpoint = 7,
280  TrapIDDebugReserved8 = 8,
281  TrapIDDebugReservedFE = 0xfe,
282  TrapIDDebugReservedFF = 0xff
283  };
284 
286  LLVMTrapHandlerRegValue = 1
287  };
288 
289 private:
290  /// GlobalISel related APIs.
291  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
292  std::unique_ptr<InstructionSelector> InstSelector;
293  std::unique_ptr<LegalizerInfo> Legalizer;
294  std::unique_ptr<RegisterBankInfo> RegBankInfo;
295 
296 protected:
297  // Basic subtarget description.
299  unsigned Gen;
300  unsigned IsaVersion;
304 
305  // Possibly statically set by tablegen, but may want to be overridden.
308 
309  // Dynamially set bits that enable features.
311  bool DX10Clamp;
322 
323  // Used as options.
329  bool DumpCode;
330 
331  // Subtarget statically properties set by tablegen
332  bool FP64;
333  bool FMA;
334  bool MIMG_R128;
335  bool IsGCN;
337  bool CIInsts;
338  bool VIInsts;
339  bool GFX9Insts;
344  bool HasMovrel;
353  bool HasDPP;
364  bool CaymanISA;
365  bool CFALUBug;
369 
370  // Dummy feature to use for assembler in tablegen.
372 
374 private:
375  SIInstrInfo InstrInfo;
376  SITargetLowering TLInfo;
377  SIFrameLowering FrameLowering;
378 
379 public:
380  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
381  const GCNTargetMachine &TM);
382  ~GCNSubtarget() override;
383 
384  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
385  StringRef GPU, StringRef FS);
386 
387  const SIInstrInfo *getInstrInfo() const override {
388  return &InstrInfo;
389  }
390 
391  const SIFrameLowering *getFrameLowering() const override {
392  return &FrameLowering;
393  }
394 
395  const SITargetLowering *getTargetLowering() const override {
396  return &TLInfo;
397  }
398 
399  const SIRegisterInfo *getRegisterInfo() const override {
400  return &InstrInfo.getRegisterInfo();
401  }
402 
403  const CallLowering *getCallLowering() const override {
404  return CallLoweringInfo.get();
405  }
406 
407  const InstructionSelector *getInstructionSelector() const override {
408  return InstSelector.get();
409  }
410 
411  const LegalizerInfo *getLegalizerInfo() const override {
412  return Legalizer.get();
413  }
414 
415  const RegisterBankInfo *getRegBankInfo() const override {
416  return RegBankInfo.get();
417  }
418 
419  // Nothing implemented, just prevent crashes on use.
420  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
421  return &TSInfo;
422  }
423 
424  const InstrItineraryData *getInstrItineraryData() const override {
425  return &InstrItins;
426  }
427 
428  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
429 
431  return (Generation)Gen;
432  }
433 
434  unsigned getWavefrontSizeLog2() const {
435  return Log2_32(WavefrontSize);
436  }
437 
438  int getLDSBankCount() const {
439  return LDSBankCount;
440  }
441 
442  unsigned getMaxPrivateElementSize() const {
443  return MaxPrivateElementSize;
444  }
445 
446  bool hasIntClamp() const {
447  return HasIntClamp;
448  }
449 
450  bool hasFP64() const {
451  return FP64;
452  }
453 
454  bool hasMIMG_R128() const {
455  return MIMG_R128;
456  }
457 
458  bool hasHWFP64() const {
459  return FP64;
460  }
461 
462  bool hasFastFMAF32() const {
463  return FastFMAF32;
464  }
465 
466  bool hasHalfRate64Ops() const {
467  return HalfRate64Ops;
468  }
469 
470  bool hasAddr64() const {
471  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
472  }
473 
474  bool hasBFE() const {
475  return true;
476  }
477 
478  bool hasBFI() const {
479  return true;
480  }
481 
482  bool hasBFM() const {
483  return hasBFE();
484  }
485 
486  bool hasBCNT(unsigned Size) const {
487  return true;
488  }
489 
490  bool hasFFBL() const {
491  return true;
492  }
493 
494  bool hasFFBH() const {
495  return true;
496  }
497 
498  bool hasMed3_16() const {
499  return getGeneration() >= AMDGPUSubtarget::GFX9;
500  }
501 
502  bool hasMin3Max3_16() const {
503  return getGeneration() >= AMDGPUSubtarget::GFX9;
504  }
505 
506  bool hasFmaMixInsts() const {
507  return HasFmaMixInsts;
508  }
509 
510  bool hasCARRY() const {
511  return true;
512  }
513 
514  bool hasFMA() const {
515  return FMA;
516  }
517 
518  bool hasSwap() const {
519  return GFX9Insts;
520  }
521 
523  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
524  }
525 
526  bool enableHugePrivateBuffer() const {
527  return EnableHugePrivateBuffer;
528  }
529 
531  return EnableUnsafeDSOffsetFolding;
532  }
533 
534  bool dumpCode() const {
535  return DumpCode;
536  }
537 
538  /// Return the amount of LDS that can be used that will not restrict the
539  /// occupancy lower than WaveCount.
540  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
541  const Function &) const;
542 
543  bool hasFP16Denormals() const {
544  return FP64FP16Denormals;
545  }
546 
547  bool hasFP64Denormals() const {
548  return FP64FP16Denormals;
549  }
550 
552  return getGeneration() >= AMDGPUSubtarget::GFX9;
553  }
554 
555  bool enableDX10Clamp() const {
556  return DX10Clamp;
557  }
558 
559  bool enableIEEEBit(const MachineFunction &MF) const {
561  }
562 
563  bool useFlatForGlobal() const {
564  return FlatForGlobal;
565  }
566 
567  /// \returns If target supports ds_read/write_b128 and user enables generation
568  /// of ds_read/write_b128.
569  bool useDS128() const {
570  return CIInsts && EnableDS128;
571  }
572 
573  /// \returns If MUBUF instructions always perform range checking, even for
574  /// buffer resources used for private memory access.
576  return getGeneration() < AMDGPUSubtarget::GFX9;
577  }
578 
580  return AutoWaitcntBeforeBarrier;
581  }
582 
583  bool hasCodeObjectV3() const {
584  // FIXME: Need to add code object v3 support for mesa and pal.
585  return isAmdHsaOS() ? CodeObjectV3 : false;
586  }
587 
589  return UnalignedBufferAccess;
590  }
591 
593  return UnalignedScratchAccess;
594  }
595 
596  bool hasApertureRegs() const {
597  return HasApertureRegs;
598  }
599 
600  bool isTrapHandlerEnabled() const {
601  return TrapHandler;
602  }
603 
604  bool isXNACKEnabled() const {
605  return EnableXNACK;
606  }
607 
608  bool hasFlatAddressSpace() const {
609  return FlatAddressSpace;
610  }
611 
612  bool hasFlatInstOffsets() const {
613  return FlatInstOffsets;
614  }
615 
616  bool hasFlatGlobalInsts() const {
617  return FlatGlobalInsts;
618  }
619 
620  bool hasFlatScratchInsts() const {
621  return FlatScratchInsts;
622  }
623 
625  return getGeneration() > GFX9;
626  }
627 
628  bool hasD16LoadStore() const {
629  return getGeneration() >= GFX9;
630  }
631 
632  /// Return if most LDS instructions have an m0 use that require m0 to be
633  /// iniitalized.
634  bool ldsRequiresM0Init() const {
635  return getGeneration() < GFX9;
636  }
637 
638  bool hasAddNoCarry() const {
639  return AddNoCarryInsts;
640  }
641 
642  bool hasUnpackedD16VMem() const {
643  return HasUnpackedD16VMem;
644  }
645 
646  // Covers VS/PS/CS graphics shaders
647  bool isMesaGfxShader(const Function &F) const {
648  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
649  }
650 
651  bool hasMad64_32() const {
652  return getGeneration() >= SEA_ISLANDS;
653  }
654 
655  bool hasSDWAOmod() const {
656  return HasSDWAOmod;
657  }
658 
659  bool hasSDWAScalar() const {
660  return HasSDWAScalar;
661  }
662 
663  bool hasSDWASdst() const {
664  return HasSDWASdst;
665  }
666 
667  bool hasSDWAMac() const {
668  return HasSDWAMac;
669  }
670 
671  bool hasSDWAOutModsVOPC() const {
672  return HasSDWAOutModsVOPC;
673  }
674 
676  return getGeneration() < SEA_ISLANDS;
677  }
678 
679  bool hasDLInsts() const {
680  return HasDLInsts;
681  }
682 
683  bool isSRAMECCEnabled() const {
684  return EnableSRAMECC;
685  }
686 
687  // Scratch is allocated in 256 dword per wave blocks for the entire
688  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
689  // is 4-byte aligned.
690  //
691  // Only 4-byte alignment is really needed to access anything. Transformations
692  // on the pointer value itself may rely on the alignment / known low bits of
693  // the pointer. Set this to something above the minimum to avoid needing
694  // dynamic realignment in common cases.
695  unsigned getStackAlignment() const {
696  return 16;
697  }
698 
699  bool enableMachineScheduler() const override {
700  return true;
701  }
702 
703  bool enableSubRegLiveness() const override {
704  return true;
705  }
706 
707  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
709 
710  /// \returns Number of execution units per compute unit supported by the
711  /// subtarget.
712  unsigned getEUsPerCU() const {
713  return AMDGPU::IsaInfo::getEUsPerCU(this);
714  }
715 
716  /// \returns Maximum number of waves per compute unit supported by the
717  /// subtarget without any kind of limitation.
718  unsigned getMaxWavesPerCU() const {
720  }
721 
722  /// \returns Maximum number of waves per compute unit supported by the
723  /// subtarget and limited by given \p FlatWorkGroupSize.
724  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
725  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
726  }
727 
728  /// \returns Maximum number of waves per execution unit supported by the
729  /// subtarget without any kind of limitation.
730  unsigned getMaxWavesPerEU() const {
732  }
733 
734  /// \returns Number of waves per work group supported by the subtarget and
735  /// limited by given \p FlatWorkGroupSize.
736  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
737  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
738  }
739 
740  // static wrappers
741  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
742 
743  // XXX - Why is this here if it isn't in the default pass set?
744  bool enableEarlyIfConversion() const override {
745  return true;
746  }
747 
748  void overrideSchedPolicy(MachineSchedPolicy &Policy,
749  unsigned NumRegionInstrs) const override;
750 
751  unsigned getMaxNumUserSGPRs() const {
752  return 16;
753  }
754 
755  bool hasSMemRealTime() const {
756  return HasSMemRealTime;
757  }
758 
759  bool hasMovrel() const {
760  return HasMovrel;
761  }
762 
763  bool hasVGPRIndexMode() const {
764  return HasVGPRIndexMode;
765  }
766 
767  bool useVGPRIndexMode(bool UserEnable) const {
768  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
769  }
770 
771  bool hasScalarCompareEq64() const {
772  return getGeneration() >= VOLCANIC_ISLANDS;
773  }
774 
775  bool hasScalarStores() const {
776  return HasScalarStores;
777  }
778 
779  bool hasScalarAtomics() const {
780  return HasScalarAtomics;
781  }
782 
783 
784  bool hasDPP() const {
785  return HasDPP;
786  }
787 
788  bool hasR128A16() const {
789  return HasR128A16;
790  }
791 
792  bool enableSIScheduler() const {
793  return EnableSIScheduler;
794  }
795 
796  bool debuggerSupported() const {
797  return debuggerInsertNops() && debuggerEmitPrologue();
798  }
799 
800  bool debuggerInsertNops() const {
801  return DebuggerInsertNops;
802  }
803 
804  bool debuggerEmitPrologue() const {
805  return DebuggerEmitPrologue;
806  }
807 
808  bool loadStoreOptEnabled() const {
809  return EnableLoadStoreOpt;
810  }
811 
812  bool hasSGPRInitBug() const {
813  return SGPRInitBug;
814  }
815 
816  bool has12DWordStoreHazard() const {
817  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
818  }
819 
820  bool hasSMovFedHazard() const {
821  return getGeneration() >= AMDGPUSubtarget::GFX9;
822  }
823 
825  return getGeneration() >= AMDGPUSubtarget::GFX9;
826  }
827 
828  bool hasReadM0SendMsgHazard() const {
829  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
830  }
831 
832  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
833  /// SGPRs
834  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
835 
836  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
837  /// VGPRs
838  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
839 
840  /// \returns true if the flat_scratch register should be initialized with the
841  /// pointer to the wave's scratch memory rather than a size and offset.
842  bool flatScratchIsPointer() const {
843  return getGeneration() >= AMDGPUSubtarget::GFX9;
844  }
845 
846  /// \returns true if the machine has merged shaders in which s0-s7 are
847  /// reserved by the hardware and user SGPRs start at s8
848  bool hasMergedShaders() const {
849  return getGeneration() >= GFX9;
850  }
851 
852  /// \returns SGPR allocation granularity supported by the subtarget.
853  unsigned getSGPRAllocGranule() const {
855  }
856 
857  /// \returns SGPR encoding granularity supported by the subtarget.
858  unsigned getSGPREncodingGranule() const {
860  }
861 
862  /// \returns Total number of SGPRs supported by the subtarget.
863  unsigned getTotalNumSGPRs() const {
865  }
866 
867  /// \returns Addressable number of SGPRs supported by the subtarget.
868  unsigned getAddressableNumSGPRs() const {
870  }
871 
872  /// \returns Minimum number of SGPRs that meets the given number of waves per
873  /// execution unit requirement supported by the subtarget.
874  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
875  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
876  }
877 
878  /// \returns Maximum number of SGPRs that meets the given number of waves per
879  /// execution unit requirement supported by the subtarget.
880  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
881  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
882  }
883 
884  /// \returns Reserved number of SGPRs for given function \p MF.
885  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
886 
887  /// \returns Maximum number of SGPRs that meets number of waves per execution
888  /// unit requirement for function \p MF, or number of SGPRs explicitly
889  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
890  ///
891  /// \returns Value that meets number of waves per execution unit requirement
892  /// if explicitly requested value cannot be converted to integer, violates
893  /// subtarget's specifications, or does not meet number of waves per execution
894  /// unit requirement.
895  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
896 
897  /// \returns VGPR allocation granularity supported by the subtarget.
898  unsigned getVGPRAllocGranule() const {
900  }
901 
902  /// \returns VGPR encoding granularity supported by the subtarget.
903  unsigned getVGPREncodingGranule() const {
905  }
906 
907  /// \returns Total number of VGPRs supported by the subtarget.
908  unsigned getTotalNumVGPRs() const {
910  }
911 
912  /// \returns Addressable number of VGPRs supported by the subtarget.
913  unsigned getAddressableNumVGPRs() const {
915  }
916 
917  /// \returns Minimum number of VGPRs that meets given number of waves per
918  /// execution unit requirement supported by the subtarget.
919  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
920  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
921  }
922 
923  /// \returns Maximum number of VGPRs that meets given number of waves per
924  /// execution unit requirement supported by the subtarget.
925  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
926  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
927  }
928 
929  /// \returns Maximum number of VGPRs that meets number of waves per execution
930  /// unit requirement for function \p MF, or number of VGPRs explicitly
931  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
932  ///
933  /// \returns Value that meets number of waves per execution unit requirement
934  /// if explicitly requested value cannot be converted to integer, violates
935  /// subtarget's specifications, or does not meet number of waves per execution
936  /// unit requirement.
937  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
938 
939  void getPostRAMutations(
940  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
941  const override;
942 
943  /// \returns Maximum number of work groups per compute unit supported by the
944  /// subtarget and limited by given \p FlatWorkGroupSize.
945  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
946  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
947  }
948 
949  /// \returns Minimum flat work group size supported by the subtarget.
950  unsigned getMinFlatWorkGroupSize() const override {
952  }
953 
954  /// \returns Maximum flat work group size supported by the subtarget.
955  unsigned getMaxFlatWorkGroupSize() const override {
957  }
958 
959  /// \returns Maximum number of waves per execution unit supported by the
960  /// subtarget and limited by given \p FlatWorkGroupSize.
961  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
962  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
963  }
964 
965  /// \returns Minimum number of waves per execution unit supported by the
966  /// subtarget.
967  unsigned getMinWavesPerEU() const override {
969  }
970 };
971 
972 class R600Subtarget final : public R600GenSubtargetInfo,
973  public AMDGPUSubtarget {
974 private:
975  R600InstrInfo InstrInfo;
976  R600FrameLowering FrameLowering;
977  bool FMA;
978  bool CaymanISA;
979  bool CFALUBug;
980  bool DX10Clamp;
981  bool HasVertexCache;
982  bool R600ALUInst;
983  bool FP64;
984  short TexVTXClauseSize;
985  Generation Gen;
986  R600TargetLowering TLInfo;
987  InstrItineraryData InstrItins;
988  SelectionDAGTargetInfo TSInfo;
989 
990 public:
991  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
992  const TargetMachine &TM);
993 
994  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
995 
996  const R600FrameLowering *getFrameLowering() const override {
997  return &FrameLowering;
998  }
999 
1000  const R600TargetLowering *getTargetLowering() const override {
1001  return &TLInfo;
1002  }
1003 
1004  const R600RegisterInfo *getRegisterInfo() const override {
1005  return &InstrInfo.getRegisterInfo();
1006  }
1007 
1008  const InstrItineraryData *getInstrItineraryData() const override {
1009  return &InstrItins;
1010  }
1011 
1012  // Nothing implemented, just prevent crashes on use.
1014  return &TSInfo;
1015  }
1016 
1017  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1018 
1020  return Gen;
1021  }
1022 
1023  unsigned getStackAlignment() const {
1024  return 4;
1025  }
1026 
1027  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1028  StringRef GPU, StringRef FS);
1029 
1030  bool hasBFE() const {
1031  return (getGeneration() >= EVERGREEN);
1032  }
1033 
1034  bool hasBFI() const {
1035  return (getGeneration() >= EVERGREEN);
1036  }
1037 
1038  bool hasBCNT(unsigned Size) const {
1039  if (Size == 32)
1040  return (getGeneration() >= EVERGREEN);
1041 
1042  return false;
1043  }
1044 
1045  bool hasBORROW() const {
1046  return (getGeneration() >= EVERGREEN);
1047  }
1048 
1049  bool hasCARRY() const {
1050  return (getGeneration() >= EVERGREEN);
1051  }
1052 
1053  bool hasCaymanISA() const {
1054  return CaymanISA;
1055  }
1056 
1057  bool hasFFBL() const {
1058  return (getGeneration() >= EVERGREEN);
1059  }
1060 
1061  bool hasFFBH() const {
1062  return (getGeneration() >= EVERGREEN);
1063  }
1064 
1065  bool hasFMA() const { return FMA; }
1066 
1067  bool hasCFAluBug() const { return CFALUBug; }
1068 
1069  bool hasVertexCache() const { return HasVertexCache; }
1070 
1071  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1072 
1073  bool enableMachineScheduler() const override {
1074  return true;
1075  }
1076 
1077  bool enableSubRegLiveness() const override {
1078  return true;
1079  }
1080 
1081  /// \returns Maximum number of work groups per compute unit supported by the
1082  /// subtarget and limited by given \p FlatWorkGroupSize.
1083  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1084  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1085  }
1086 
1087  /// \returns Minimum flat work group size supported by the subtarget.
1088  unsigned getMinFlatWorkGroupSize() const override {
1090  }
1091 
1092  /// \returns Maximum flat work group size supported by the subtarget.
1093  unsigned getMaxFlatWorkGroupSize() const override {
1095  }
1096 
1097  /// \returns Maximum number of waves per execution unit supported by the
1098  /// subtarget and limited by given \p FlatWorkGroupSize.
1099  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1100  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1101  }
1102 
1103  /// \returns Minimum number of waves per execution unit supported by the
1104  /// subtarget.
1105  unsigned getMinWavesPerEU() const override {
1106  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1107  }
1108 };
1109 
1110 } // end namespace llvm
1111 
1112 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool enableIEEEBit(const MachineFunction &MF) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasSDWAOmod() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool debuggerSupported() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:299
bool isPromoteAllocaEnabled() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasFastFMAF32() const
Generation getGeneration() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool enableDX10Clamp() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool debuggerInsertNops() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool dumpCode() const
bool debuggerEmitPrologue() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
bool isCompute(CallingConv::ID cc)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
const R600InstrInfo * getInstrInfo() const override
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:47
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override