LLVM  8.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <memory>
38 #include <utility>
39 
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 #define GET_SUBTARGETINFO_HEADER
43 #include "R600GenSubtargetInfo.inc"
44 
45 namespace llvm {
46 
47 class StringRef;
48 
50 public:
51  enum Generation {
52  R600 = 0,
53  R700 = 1,
54  EVERGREEN = 2,
59  GFX9 = 7
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
71  bool HasSDWA;
73  bool HasMulI24;
74  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdCodeObjectV2(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool isPromoteAllocaEnabled() const {
183  return EnablePromoteAlloca;
184  }
185 
186  unsigned getWavefrontSize() const {
187  return WavefrontSize;
188  }
189 
190  int getLocalMemorySize() const {
191  return LocalMemorySize;
192  }
193 
194  unsigned getAlignmentForImplicitArgPtr() const {
195  return isAmdHsaOS() ? 8 : 4;
196  }
197 
198  /// Returns the offset in bytes from the start of the input buffer
199  /// of the first explicit kernel argument.
200  unsigned getExplicitKernelArgOffset(const Function &F) const {
201  return isAmdCodeObjectV2(F) ? 0 : 36;
202  }
203 
204  /// \returns Maximum number of work groups per compute unit supported by the
205  /// subtarget and limited by given \p FlatWorkGroupSize.
206  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
207  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
208  FlatWorkGroupSize);
209  }
210 
211  /// \returns Minimum flat work group size supported by the subtarget.
212  unsigned getMinFlatWorkGroupSize() const {
213  return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
214  }
215 
216  /// \returns Maximum flat work group size supported by the subtarget.
217  unsigned getMaxFlatWorkGroupSize() const {
218  return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
219  }
220 
221  /// \returns Maximum number of waves per execution unit supported by the
222  /// subtarget and limited by given \p FlatWorkGroupSize.
223  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
224  return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
225  FlatWorkGroupSize);
226  }
227 
228  /// \returns Minimum number of waves per execution unit supported by the
229  /// subtarget.
230  unsigned getMinWavesPerEU() const {
231  return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
232  }
233 
234  unsigned getMaxWavesPerEU() const { return 10; }
235 
236  /// Creates value range metadata on an workitemid.* inrinsic call or load.
237  bool makeLIDRangeMetadata(Instruction *I) const;
238 
239  /// \returns Number of bytes of arguments that are passed to a shader or
240  /// kernel in addition to the explicit ones declared for the function.
241  unsigned getImplicitArgNumBytes(const Function &F) const {
242  if (isMesaKernel(F))
243  return 16;
244  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
245  }
246  uint64_t getExplicitKernArgSize(const Function &F,
247  unsigned &MaxAlign) const;
248  unsigned getKernArgSegmentSize(const Function &F,
249  unsigned &MaxAlign) const;
250 
251  virtual ~AMDGPUSubtarget() {}
252 };
253 
255  public AMDGPUSubtarget {
256 public:
257  enum {
274  };
275 
277  TrapHandlerAbiNone = 0,
278  TrapHandlerAbiHsa = 1
279  };
280 
281  enum TrapID {
282  TrapIDHardwareReserved = 0,
283  TrapIDHSADebugTrap = 1,
284  TrapIDLLVMTrap = 2,
285  TrapIDLLVMDebugTrap = 3,
286  TrapIDDebugBreakpoint = 7,
287  TrapIDDebugReserved8 = 8,
288  TrapIDDebugReservedFE = 0xfe,
289  TrapIDDebugReservedFF = 0xff
290  };
291 
293  LLVMTrapHandlerRegValue = 1
294  };
295 
296 private:
297  /// GlobalISel related APIs.
298  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
299  std::unique_ptr<InstructionSelector> InstSelector;
300  std::unique_ptr<LegalizerInfo> Legalizer;
301  std::unique_ptr<RegisterBankInfo> RegBankInfo;
302 
303 protected:
304  // Basic subtarget description.
306  unsigned Gen;
307  unsigned IsaVersion;
310 
311  // Possibly statically set by tablegen, but may want to be overridden.
314 
315  // Dynamially set bits that enable features.
317  bool DX10Clamp;
328 
329  // Used as options.
336  bool DumpCode;
337 
338  // Subtarget statically properties set by tablegen
339  bool FP64;
340  bool FMA;
341  bool MIMG_R128;
342  bool IsGCN;
344  bool CIInsts;
345  bool VIInsts;
346  bool GFX9Insts;
351  bool HasMovrel;
360  bool HasDPP;
370  bool CaymanISA;
371  bool CFALUBug;
375 
376  // Dummy feature to use for assembler in tablegen.
378 
381 private:
382  SIInstrInfo InstrInfo;
383  SITargetLowering TLInfo;
384  SIFrameLowering FrameLowering;
385 
386 public:
387  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
388  const GCNTargetMachine &TM);
389  ~GCNSubtarget() override;
390 
391  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
392  StringRef GPU, StringRef FS);
393 
394  const SIInstrInfo *getInstrInfo() const override {
395  return &InstrInfo;
396  }
397 
398  const SIFrameLowering *getFrameLowering() const override {
399  return &FrameLowering;
400  }
401 
402  const SITargetLowering *getTargetLowering() const override {
403  return &TLInfo;
404  }
405 
406  const SIRegisterInfo *getRegisterInfo() const override {
407  return &InstrInfo.getRegisterInfo();
408  }
409 
410  const CallLowering *getCallLowering() const override {
411  return CallLoweringInfo.get();
412  }
413 
414  const InstructionSelector *getInstructionSelector() const override {
415  return InstSelector.get();
416  }
417 
418  const LegalizerInfo *getLegalizerInfo() const override {
419  return Legalizer.get();
420  }
421 
422  const RegisterBankInfo *getRegBankInfo() const override {
423  return RegBankInfo.get();
424  }
425 
426  // Nothing implemented, just prevent crashes on use.
427  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
428  return &TSInfo;
429  }
430 
431  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
432 
434  return (Generation)Gen;
435  }
436 
437  unsigned getWavefrontSizeLog2() const {
438  return Log2_32(WavefrontSize);
439  }
440 
441  int getLDSBankCount() const {
442  return LDSBankCount;
443  }
444 
445  unsigned getMaxPrivateElementSize() const {
446  return MaxPrivateElementSize;
447  }
448 
450  return AS;
451  }
452 
453  bool hasIntClamp() const {
454  return HasIntClamp;
455  }
456 
457  bool hasFP64() const {
458  return FP64;
459  }
460 
461  bool hasMIMG_R128() const {
462  return MIMG_R128;
463  }
464 
465  bool hasHWFP64() const {
466  return FP64;
467  }
468 
469  bool hasFastFMAF32() const {
470  return FastFMAF32;
471  }
472 
473  bool hasHalfRate64Ops() const {
474  return HalfRate64Ops;
475  }
476 
477  bool hasAddr64() const {
478  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
479  }
480 
481  bool hasBFE() const {
482  return true;
483  }
484 
485  bool hasBFI() const {
486  return true;
487  }
488 
489  bool hasBFM() const {
490  return hasBFE();
491  }
492 
493  bool hasBCNT(unsigned Size) const {
494  return true;
495  }
496 
497  bool hasFFBL() const {
498  return true;
499  }
500 
501  bool hasFFBH() const {
502  return true;
503  }
504 
505  bool hasMed3_16() const {
506  return getGeneration() >= AMDGPUSubtarget::GFX9;
507  }
508 
509  bool hasMin3Max3_16() const {
510  return getGeneration() >= AMDGPUSubtarget::GFX9;
511  }
512 
513  bool hasFmaMixInsts() const {
514  return HasFmaMixInsts;
515  }
516 
517  bool hasCARRY() const {
518  return true;
519  }
520 
521  bool hasFMA() const {
522  return FMA;
523  }
524 
526  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
527  }
528 
529  bool enableHugePrivateBuffer() const {
530  return EnableHugePrivateBuffer;
531  }
532 
534  return EnableUnsafeDSOffsetFolding;
535  }
536 
537  bool dumpCode() const {
538  return DumpCode;
539  }
540 
541  /// Return the amount of LDS that can be used that will not restrict the
542  /// occupancy lower than WaveCount.
543  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
544  const Function &) const;
545 
546  bool hasFP16Denormals() const {
547  return FP64FP16Denormals;
548  }
549 
550  bool hasFP64Denormals() const {
551  return FP64FP16Denormals;
552  }
553 
555  return getGeneration() >= AMDGPUSubtarget::GFX9;
556  }
557 
558  bool enableDX10Clamp() const {
559  return DX10Clamp;
560  }
561 
562  bool enableIEEEBit(const MachineFunction &MF) const {
564  }
565 
566  bool useFlatForGlobal() const {
567  return FlatForGlobal;
568  }
569 
570  /// \returns If target supports ds_read/write_b128 and user enables generation
571  /// of ds_read/write_b128.
572  bool useDS128() const {
573  return CIInsts && EnableDS128;
574  }
575 
576  /// \returns If MUBUF instructions always perform range checking, even for
577  /// buffer resources used for private memory access.
579  return getGeneration() < AMDGPUSubtarget::GFX9;
580  }
581 
583  return AutoWaitcntBeforeBarrier;
584  }
585 
586  bool hasCodeObjectV3() const {
587  return CodeObjectV3;
588  }
589 
591  return UnalignedBufferAccess;
592  }
593 
595  return UnalignedScratchAccess;
596  }
597 
598  bool hasApertureRegs() const {
599  return HasApertureRegs;
600  }
601 
602  bool isTrapHandlerEnabled() const {
603  return TrapHandler;
604  }
605 
606  bool isXNACKEnabled() const {
607  return EnableXNACK;
608  }
609 
610  bool hasFlatAddressSpace() const {
611  return FlatAddressSpace;
612  }
613 
614  bool hasFlatInstOffsets() const {
615  return FlatInstOffsets;
616  }
617 
618  bool hasFlatGlobalInsts() const {
619  return FlatGlobalInsts;
620  }
621 
622  bool hasFlatScratchInsts() const {
623  return FlatScratchInsts;
624  }
625 
627  return getGeneration() > GFX9;
628  }
629 
630  bool hasD16LoadStore() const {
631  return getGeneration() >= GFX9;
632  }
633 
634  /// Return if most LDS instructions have an m0 use that require m0 to be
635  /// iniitalized.
636  bool ldsRequiresM0Init() const {
637  return getGeneration() < GFX9;
638  }
639 
640  bool hasAddNoCarry() const {
641  return AddNoCarryInsts;
642  }
643 
644  bool hasUnpackedD16VMem() const {
645  return HasUnpackedD16VMem;
646  }
647 
648  // Covers VS/PS/CS graphics shaders
649  bool isMesaGfxShader(const Function &F) const {
650  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
651  }
652 
653  bool hasMad64_32() const {
654  return getGeneration() >= SEA_ISLANDS;
655  }
656 
657  bool hasSDWAOmod() const {
658  return HasSDWAOmod;
659  }
660 
661  bool hasSDWAScalar() const {
662  return HasSDWAScalar;
663  }
664 
665  bool hasSDWASdst() const {
666  return HasSDWASdst;
667  }
668 
669  bool hasSDWAMac() const {
670  return HasSDWAMac;
671  }
672 
673  bool hasSDWAOutModsVOPC() const {
674  return HasSDWAOutModsVOPC;
675  }
676 
678  return getGeneration() < SEA_ISLANDS;
679  }
680 
681  bool hasDLInsts() const {
682  return HasDLInsts;
683  }
684 
685  bool d16PreservesUnusedBits() const {
686  return D16PreservesUnusedBits;
687  }
688 
689  // Scratch is allocated in 256 dword per wave blocks for the entire
690  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
691  // is 4-byte aligned.
692  //
693  // Only 4-byte alignment is really needed to access anything. Transformations
694  // on the pointer value itself may rely on the alignment / known low bits of
695  // the pointer. Set this to something above the minimum to avoid needing
696  // dynamic realignment in common cases.
697  unsigned getStackAlignment() const {
698  return 16;
699  }
700 
701  bool enableMachineScheduler() const override {
702  return true;
703  }
704 
705  bool enableSubRegLiveness() const override {
706  return true;
707  }
708 
709  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
711 
712  /// \returns Number of execution units per compute unit supported by the
713  /// subtarget.
714  unsigned getEUsPerCU() const {
716  }
717 
718  /// \returns Maximum number of waves per compute unit supported by the
719  /// subtarget without any kind of limitation.
720  unsigned getMaxWavesPerCU() const {
722  }
723 
724  /// \returns Maximum number of waves per compute unit supported by the
725  /// subtarget and limited by given \p FlatWorkGroupSize.
726  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
728  FlatWorkGroupSize);
729  }
730 
731  /// \returns Maximum number of waves per execution unit supported by the
732  /// subtarget without any kind of limitation.
733  unsigned getMaxWavesPerEU() const {
735  }
736 
737  /// \returns Number of waves per work group supported by the subtarget and
738  /// limited by given \p FlatWorkGroupSize.
739  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
741  MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
742  }
743 
744  // static wrappers
745  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
746 
747  // XXX - Why is this here if it isn't in the default pass set?
748  bool enableEarlyIfConversion() const override {
749  return true;
750  }
751 
752  void overrideSchedPolicy(MachineSchedPolicy &Policy,
753  unsigned NumRegionInstrs) const override;
754 
755  bool isVGPRSpillingEnabled(const Function &F) const;
756 
757  unsigned getMaxNumUserSGPRs() const {
758  return 16;
759  }
760 
761  bool hasSMemRealTime() const {
762  return HasSMemRealTime;
763  }
764 
765  bool hasMovrel() const {
766  return HasMovrel;
767  }
768 
769  bool hasVGPRIndexMode() const {
770  return HasVGPRIndexMode;
771  }
772 
773  bool useVGPRIndexMode(bool UserEnable) const {
774  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
775  }
776 
777  bool hasScalarCompareEq64() const {
778  return getGeneration() >= VOLCANIC_ISLANDS;
779  }
780 
781  bool hasScalarStores() const {
782  return HasScalarStores;
783  }
784 
785  bool hasScalarAtomics() const {
786  return HasScalarAtomics;
787  }
788 
789 
790  bool hasDPP() const {
791  return HasDPP;
792  }
793 
794  bool enableSIScheduler() const {
795  return EnableSIScheduler;
796  }
797 
798  bool debuggerSupported() const {
799  return debuggerInsertNops() && debuggerEmitPrologue();
800  }
801 
802  bool debuggerInsertNops() const {
803  return DebuggerInsertNops;
804  }
805 
806  bool debuggerEmitPrologue() const {
807  return DebuggerEmitPrologue;
808  }
809 
810  bool loadStoreOptEnabled() const {
811  return EnableLoadStoreOpt;
812  }
813 
814  bool hasSGPRInitBug() const {
815  return SGPRInitBug;
816  }
817 
818  bool has12DWordStoreHazard() const {
819  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
820  }
821 
822  bool hasSMovFedHazard() const {
823  return getGeneration() >= AMDGPUSubtarget::GFX9;
824  }
825 
827  return getGeneration() >= AMDGPUSubtarget::GFX9;
828  }
829 
830  bool hasReadM0SendMsgHazard() const {
831  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
832  }
833 
834  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
835  /// SGPRs
836  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
837 
838  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
839  /// VGPRs
840  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
841 
842  /// \returns true if the flat_scratch register should be initialized with the
843  /// pointer to the wave's scratch memory rather than a size and offset.
844  bool flatScratchIsPointer() const {
845  return getGeneration() >= AMDGPUSubtarget::GFX9;
846  }
847 
848  /// \returns true if the machine has merged shaders in which s0-s7 are
849  /// reserved by the hardware and user SGPRs start at s8
850  bool hasMergedShaders() const {
851  return getGeneration() >= GFX9;
852  }
853 
854  /// \returns SGPR allocation granularity supported by the subtarget.
855  unsigned getSGPRAllocGranule() const {
858  }
859 
860  /// \returns SGPR encoding granularity supported by the subtarget.
861  unsigned getSGPREncodingGranule() const {
864  }
865 
866  /// \returns Total number of SGPRs supported by the subtarget.
867  unsigned getTotalNumSGPRs() const {
869  }
870 
871  /// \returns Addressable number of SGPRs supported by the subtarget.
872  unsigned getAddressableNumSGPRs() const {
875  }
876 
877  /// \returns Minimum number of SGPRs that meets the given number of waves per
878  /// execution unit requirement supported by the subtarget.
879  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
881  WavesPerEU);
882  }
883 
884  /// \returns Maximum number of SGPRs that meets the given number of waves per
885  /// execution unit requirement supported by the subtarget.
886  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
888  WavesPerEU, Addressable);
889  }
890 
891  /// \returns Reserved number of SGPRs for given function \p MF.
892  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
893 
894  /// \returns Maximum number of SGPRs that meets number of waves per execution
895  /// unit requirement for function \p MF, or number of SGPRs explicitly
896  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
897  ///
898  /// \returns Value that meets number of waves per execution unit requirement
899  /// if explicitly requested value cannot be converted to integer, violates
900  /// subtarget's specifications, or does not meet number of waves per execution
901  /// unit requirement.
902  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
903 
904  /// \returns VGPR allocation granularity supported by the subtarget.
905  unsigned getVGPRAllocGranule() const {
908  }
909 
910  /// \returns VGPR encoding granularity supported by the subtarget.
911  unsigned getVGPREncodingGranule() const {
914  }
915 
916  /// \returns Total number of VGPRs supported by the subtarget.
917  unsigned getTotalNumVGPRs() const {
919  }
920 
921  /// \returns Addressable number of VGPRs supported by the subtarget.
922  unsigned getAddressableNumVGPRs() const {
925  }
926 
927  /// \returns Minimum number of VGPRs that meets given number of waves per
928  /// execution unit requirement supported by the subtarget.
929  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
931  WavesPerEU);
932  }
933 
934  /// \returns Maximum number of VGPRs that meets given number of waves per
935  /// execution unit requirement supported by the subtarget.
936  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
938  WavesPerEU);
939  }
940 
941  /// \returns Maximum number of VGPRs that meets number of waves per execution
942  /// unit requirement for function \p MF, or number of VGPRs explicitly
943  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
944  ///
945  /// \returns Value that meets number of waves per execution unit requirement
946  /// if explicitly requested value cannot be converted to integer, violates
947  /// subtarget's specifications, or does not meet number of waves per execution
948  /// unit requirement.
949  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
950 
951  void getPostRAMutations(
952  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
953  const override;
954 };
955 
956 class R600Subtarget final : public R600GenSubtargetInfo,
957  public AMDGPUSubtarget {
958 private:
959  R600InstrInfo InstrInfo;
960  R600FrameLowering FrameLowering;
961  bool FMA;
962  bool CaymanISA;
963  bool CFALUBug;
964  bool DX10Clamp;
965  bool HasVertexCache;
966  bool R600ALUInst;
967  bool FP64;
968  short TexVTXClauseSize;
969  Generation Gen;
970  R600TargetLowering TLInfo;
971  InstrItineraryData InstrItins;
972  SelectionDAGTargetInfo TSInfo;
973  AMDGPUAS AS;
974 
975 public:
976  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
977  const TargetMachine &TM);
978 
979  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
980 
981  const R600FrameLowering *getFrameLowering() const override {
982  return &FrameLowering;
983  }
984 
985  const R600TargetLowering *getTargetLowering() const override {
986  return &TLInfo;
987  }
988 
989  const R600RegisterInfo *getRegisterInfo() const override {
990  return &InstrInfo.getRegisterInfo();
991  }
992 
993  const InstrItineraryData *getInstrItineraryData() const override {
994  return &InstrItins;
995  }
996 
997  // Nothing implemented, just prevent crashes on use.
998  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
999  return &TSInfo;
1000  }
1001 
1002  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1003 
1005  return Gen;
1006  }
1007 
1008  unsigned getStackAlignment() const {
1009  return 4;
1010  }
1011 
1012  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1013  StringRef GPU, StringRef FS);
1014 
1015  bool hasBFE() const {
1016  return (getGeneration() >= EVERGREEN);
1017  }
1018 
1019  bool hasBFI() const {
1020  return (getGeneration() >= EVERGREEN);
1021  }
1022 
1023  bool hasBCNT(unsigned Size) const {
1024  if (Size == 32)
1025  return (getGeneration() >= EVERGREEN);
1026 
1027  return false;
1028  }
1029 
1030  bool hasBORROW() const {
1031  return (getGeneration() >= EVERGREEN);
1032  }
1033 
1034  bool hasCARRY() const {
1035  return (getGeneration() >= EVERGREEN);
1036  }
1037 
1038  bool hasCaymanISA() const {
1039  return CaymanISA;
1040  }
1041 
1042  bool hasFFBL() const {
1043  return (getGeneration() >= EVERGREEN);
1044  }
1045 
1046  bool hasFFBH() const {
1047  return (getGeneration() >= EVERGREEN);
1048  }
1049 
1050  bool hasFMA() const { return FMA; }
1051 
1052  bool hasCFAluBug() const { return CFALUBug; }
1053 
1054  bool hasVertexCache() const { return HasVertexCache; }
1055 
1056  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1057 
1058  AMDGPUAS getAMDGPUAS() const { return AS; }
1059 
1060  bool enableMachineScheduler() const override {
1061  return true;
1062  }
1063 
1064  bool enableSubRegLiveness() const override {
1065  return true;
1066  }
1067 };
1068 
1069 } // end namespace llvm
1070 
1071 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool enableIEEEBit(const MachineFunction &MF) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasSDWAOmod() const
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
unsigned getAddressableNumVGPRs(const FeatureBitset &Features)
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const
bool hasApertureRegs() const
bool debuggerSupported() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:294
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
unsigned getSGPRAllocGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
bool isAmdCodeObjectV2(const Function &F) const
F(f)
unsigned getTotalNumVGPRs(const FeatureBitset &Features)
unsigned getMinWavesPerEU(const FeatureBitset &Features)
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasFastFMAF32() const
Generation getGeneration() const
unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:152
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
bool hasFP64Denormals() const
unsigned getVGPREncodingGranule(const FeatureBitset &Features)
unsigned getEUsPerCU(const FeatureBitset &Features)
Holds all the information related to register banks.
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
unsigned getVGPRAllocGranule(const FeatureBitset &Features)
bool hasIntClamp() const
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:224
unsigned getMinFlatWorkGroupSize() const
const FeatureBitset & getFeatureBits() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool enableDX10Clamp() const
bool debuggerInsertNops() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features)
bool isTrapHandlerEnabled() const
unsigned getMaxWavesPerCU(const FeatureBitset &Features)
bool hasSMemRealTime() const
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
Itinerary data supplied by a subtarget to be used by a target.
bool hasAddNoCarry() const
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features)
const CallLowering * getCallLowering() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
bool dumpCode() const
bool debuggerEmitPrologue() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
bool isCompute(CallingConv::ID cc)
Container class for subtarget features.
unsigned getMaxWavesPerCU() const
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
bool hasDLInsts() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasMovrel() const
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
unsigned MaxPrivateElementSize
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits)
const R600InstrInfo * getInstrInfo() const override
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
const AMDGPUAS & AS
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
bool hasMadMixInsts() const
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
AMDGPUAS getAMDGPUAS() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
const InstructionSelector * getInstructionSelector() const override
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
unsigned getMinWavesPerEU() const
R600 DAG Lowering interface definition.
unsigned getMaxFlatWorkGroupSize() const
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
unsigned getAddressableNumSGPRs(const FeatureBitset &Features)
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule(const FeatureBitset &Features)
bool hasSDWAScalar() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
const FeatureBitset & SubtargetFeatureBits
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
unsigned getSGPRAllocGranule(const FeatureBitset &Features)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
AMDGPUAS getAMDGPUAS() const
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override