LLVM  9.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7,
59  GFX10 = 8
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdHsaOrMesa(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool hasTrigReducedRange() const {
183  return HasTrigReducedRange;
184  }
185 
186  bool isPromoteAllocaEnabled() const {
187  return EnablePromoteAlloca;
188  }
189 
190  unsigned getWavefrontSize() const {
191  return WavefrontSize;
192  }
193 
194  int getLocalMemorySize() const {
195  return LocalMemorySize;
196  }
197 
198  unsigned getAlignmentForImplicitArgPtr() const {
199  return isAmdHsaOS() ? 8 : 4;
200  }
201 
202  /// Returns the offset in bytes from the start of the input buffer
203  /// of the first explicit kernel argument.
204  unsigned getExplicitKernelArgOffset(const Function &F) const {
205  return isAmdHsaOrMesa(F) ? 0 : 36;
206  }
207 
208  /// \returns Maximum number of work groups per compute unit supported by the
209  /// subtarget and limited by given \p FlatWorkGroupSize.
210  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
211 
212  /// \returns Minimum flat work group size supported by the subtarget.
213  virtual unsigned getMinFlatWorkGroupSize() const = 0;
214 
215  /// \returns Maximum flat work group size supported by the subtarget.
216  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
217 
218  /// \returns Maximum number of waves per execution unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum number of waves per execution unit supported by the
223  /// subtarget.
224  virtual unsigned getMinWavesPerEU() const = 0;
225 
226  unsigned getMaxWavesPerEU() const { return 10; }
227 
228  /// Creates value range metadata on an workitemid.* inrinsic call or load.
229  bool makeLIDRangeMetadata(Instruction *I) const;
230 
231  /// \returns Number of bytes of arguments that are passed to a shader or
232  /// kernel in addition to the explicit ones declared for the function.
233  unsigned getImplicitArgNumBytes(const Function &F) const {
234  if (isMesaKernel(F))
235  return 16;
236  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
237  }
238  uint64_t getExplicitKernArgSize(const Function &F,
239  unsigned &MaxAlign) const;
240  unsigned getKernArgSegmentSize(const Function &F,
241  unsigned &MaxAlign) const;
242 
243  virtual ~AMDGPUSubtarget() {}
244 };
245 
247  public AMDGPUSubtarget {
248 public:
250  TrapHandlerAbiNone = 0,
251  TrapHandlerAbiHsa = 1
252  };
253 
254  enum TrapID {
255  TrapIDHardwareReserved = 0,
256  TrapIDHSADebugTrap = 1,
257  TrapIDLLVMTrap = 2,
258  TrapIDLLVMDebugTrap = 3,
259  TrapIDDebugBreakpoint = 7,
260  TrapIDDebugReserved8 = 8,
261  TrapIDDebugReservedFE = 0xfe,
262  TrapIDDebugReservedFF = 0xff
263  };
264 
266  LLVMTrapHandlerRegValue = 1
267  };
268 
269 private:
270  /// GlobalISel related APIs.
271  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
272  std::unique_ptr<InstructionSelector> InstSelector;
273  std::unique_ptr<LegalizerInfo> Legalizer;
274  std::unique_ptr<RegisterBankInfo> RegBankInfo;
275 
276 protected:
277  // Basic subtarget description.
279  unsigned Gen;
283 
284  // Possibly statically set by tablegen, but may want to be overridden.
287 
288  // Dynamially set bits that enable features.
300 
301  // Used as options.
308  bool DumpCode;
309 
310  // Subtarget statically properties set by tablegen
311  bool FP64;
312  bool FMA;
313  bool MIMG_R128;
314  bool IsGCN;
316  bool CIInsts;
317  bool GFX8Insts;
318  bool GFX9Insts;
325  bool HasMovrel;
334  bool HasDPP;
343  bool HasVscnt;
355  bool CaymanISA;
356  bool CFALUBug;
361 
370 
371  // Dummy feature to use for assembler in tablegen.
373 
375 private:
376  SIInstrInfo InstrInfo;
377  SITargetLowering TLInfo;
378  SIFrameLowering FrameLowering;
379 
380 public:
381  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
382  const GCNTargetMachine &TM);
383  ~GCNSubtarget() override;
384 
385  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
386  StringRef GPU, StringRef FS);
387 
388  const SIInstrInfo *getInstrInfo() const override {
389  return &InstrInfo;
390  }
391 
392  const SIFrameLowering *getFrameLowering() const override {
393  return &FrameLowering;
394  }
395 
396  const SITargetLowering *getTargetLowering() const override {
397  return &TLInfo;
398  }
399 
400  const SIRegisterInfo *getRegisterInfo() const override {
401  return &InstrInfo.getRegisterInfo();
402  }
403 
404  const CallLowering *getCallLowering() const override {
405  return CallLoweringInfo.get();
406  }
407 
408  const InstructionSelector *getInstructionSelector() const override {
409  return InstSelector.get();
410  }
411 
412  const LegalizerInfo *getLegalizerInfo() const override {
413  return Legalizer.get();
414  }
415 
416  const RegisterBankInfo *getRegBankInfo() const override {
417  return RegBankInfo.get();
418  }
419 
420  // Nothing implemented, just prevent crashes on use.
421  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
422  return &TSInfo;
423  }
424 
425  const InstrItineraryData *getInstrItineraryData() const override {
426  return &InstrItins;
427  }
428 
429  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
430 
432  return (Generation)Gen;
433  }
434 
435  unsigned getWavefrontSizeLog2() const {
436  return Log2_32(WavefrontSize);
437  }
438 
439  int getLDSBankCount() const {
440  return LDSBankCount;
441  }
442 
443  unsigned getMaxPrivateElementSize() const {
444  return MaxPrivateElementSize;
445  }
446 
447  unsigned getConstantBusLimit(unsigned Opcode) const;
448 
449  bool hasIntClamp() const {
450  return HasIntClamp;
451  }
452 
453  bool hasFP64() const {
454  return FP64;
455  }
456 
457  bool hasMIMG_R128() const {
458  return MIMG_R128;
459  }
460 
461  bool hasHWFP64() const {
462  return FP64;
463  }
464 
465  bool hasFastFMAF32() const {
466  return FastFMAF32;
467  }
468 
469  bool hasHalfRate64Ops() const {
470  return HalfRate64Ops;
471  }
472 
473  bool hasAddr64() const {
474  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
475  }
476 
477  bool hasBFE() const {
478  return true;
479  }
480 
481  bool hasBFI() const {
482  return true;
483  }
484 
485  bool hasBFM() const {
486  return hasBFE();
487  }
488 
489  bool hasBCNT(unsigned Size) const {
490  return true;
491  }
492 
493  bool hasFFBL() const {
494  return true;
495  }
496 
497  bool hasFFBH() const {
498  return true;
499  }
500 
501  bool hasMed3_16() const {
502  return getGeneration() >= AMDGPUSubtarget::GFX9;
503  }
504 
505  bool hasMin3Max3_16() const {
506  return getGeneration() >= AMDGPUSubtarget::GFX9;
507  }
508 
509  bool hasFmaMixInsts() const {
510  return HasFmaMixInsts;
511  }
512 
513  bool hasCARRY() const {
514  return true;
515  }
516 
517  bool hasFMA() const {
518  return FMA;
519  }
520 
521  bool hasSwap() const {
522  return GFX9Insts;
523  }
524 
526  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
527  }
528 
529  bool enableHugePrivateBuffer() const {
530  return EnableHugePrivateBuffer;
531  }
532 
534  return EnableUnsafeDSOffsetFolding;
535  }
536 
537  bool dumpCode() const {
538  return DumpCode;
539  }
540 
541  /// Return the amount of LDS that can be used that will not restrict the
542  /// occupancy lower than WaveCount.
543  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
544  const Function &) const;
545 
546  bool hasFP16Denormals() const {
547  return FP64FP16Denormals;
548  }
549 
550  bool hasFP64Denormals() const {
551  return FP64FP16Denormals;
552  }
553 
555  return getGeneration() >= AMDGPUSubtarget::GFX9;
556  }
557 
558  bool useFlatForGlobal() const {
559  return FlatForGlobal;
560  }
561 
562  /// \returns If target supports ds_read/write_b128 and user enables generation
563  /// of ds_read/write_b128.
564  bool useDS128() const {
565  return CIInsts && EnableDS128;
566  }
567 
568  /// \returns If MUBUF instructions always perform range checking, even for
569  /// buffer resources used for private memory access.
571  return getGeneration() < AMDGPUSubtarget::GFX9;
572  }
573 
574  /// \returns If target requires PRT Struct NULL support (zero result registers
575  /// for sparse texture support).
576  bool usePRTStrictNull() const {
577  return EnablePRTStrictNull;
578  }
579 
581  return AutoWaitcntBeforeBarrier;
582  }
583 
584  bool hasCodeObjectV3() const {
585  // FIXME: Need to add code object v3 support for mesa and pal.
586  return isAmdHsaOS() ? CodeObjectV3 : false;
587  }
588 
590  return UnalignedBufferAccess;
591  }
592 
594  return UnalignedScratchAccess;
595  }
596 
597  bool hasApertureRegs() const {
598  return HasApertureRegs;
599  }
600 
601  bool isTrapHandlerEnabled() const {
602  return TrapHandler;
603  }
604 
605  bool isXNACKEnabled() const {
606  return EnableXNACK;
607  }
608 
609  bool isCuModeEnabled() const {
610  return EnableCuMode;
611  }
612 
613  bool hasFlatAddressSpace() const {
614  return FlatAddressSpace;
615  }
616 
617  bool hasFlatInstOffsets() const {
618  return FlatInstOffsets;
619  }
620 
621  bool hasFlatGlobalInsts() const {
622  return FlatGlobalInsts;
623  }
624 
625  bool hasFlatScratchInsts() const {
626  return FlatScratchInsts;
627  }
628 
630  return ScalarFlatScratchInsts;
631  }
632 
633  bool hasFlatSegmentOffsetBug() const {
634  return HasFlatSegmentOffsetBug;
635  }
636 
638  return getGeneration() > GFX9;
639  }
640 
641  bool hasD16LoadStore() const {
642  return getGeneration() >= GFX9;
643  }
644 
645  bool d16PreservesUnusedBits() const {
646  return hasD16LoadStore() && !isSRAMECCEnabled();
647  }
648 
649  /// Return if most LDS instructions have an m0 use that require m0 to be
650  /// iniitalized.
651  bool ldsRequiresM0Init() const {
652  return getGeneration() < GFX9;
653  }
654 
655  bool hasAddNoCarry() const {
656  return AddNoCarryInsts;
657  }
658 
659  bool hasUnpackedD16VMem() const {
660  return HasUnpackedD16VMem;
661  }
662 
663  // Covers VS/PS/CS graphics shaders
664  bool isMesaGfxShader(const Function &F) const {
665  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
666  }
667 
668  bool hasMad64_32() const {
669  return getGeneration() >= SEA_ISLANDS;
670  }
671 
672  bool hasSDWAOmod() const {
673  return HasSDWAOmod;
674  }
675 
676  bool hasSDWAScalar() const {
677  return HasSDWAScalar;
678  }
679 
680  bool hasSDWASdst() const {
681  return HasSDWASdst;
682  }
683 
684  bool hasSDWAMac() const {
685  return HasSDWAMac;
686  }
687 
688  bool hasSDWAOutModsVOPC() const {
689  return HasSDWAOutModsVOPC;
690  }
691 
692  bool hasDLInsts() const {
693  return HasDLInsts;
694  }
695 
696  bool hasDot1Insts() const {
697  return HasDot1Insts;
698  }
699 
700  bool hasDot2Insts() const {
701  return HasDot2Insts;
702  }
703 
704  bool isSRAMECCEnabled() const {
705  return EnableSRAMECC;
706  }
707 
708  bool hasNoSdstCMPX() const {
709  return HasNoSdstCMPX;
710  }
711 
712  bool hasVscnt() const {
713  return HasVscnt;
714  }
715 
716  bool hasRegisterBanking() const {
717  return HasRegisterBanking;
718  }
719 
720  bool hasVOP3Literal() const {
721  return HasVOP3Literal;
722  }
723 
724  bool hasNoDataDepHazard() const {
725  return HasNoDataDepHazard;
726  }
727 
729  return getGeneration() < SEA_ISLANDS;
730  }
731 
732  // Scratch is allocated in 256 dword per wave blocks for the entire
733  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
734  // is 4-byte aligned.
735  //
736  // Only 4-byte alignment is really needed to access anything. Transformations
737  // on the pointer value itself may rely on the alignment / known low bits of
738  // the pointer. Set this to something above the minimum to avoid needing
739  // dynamic realignment in common cases.
740  unsigned getStackAlignment() const {
741  return 16;
742  }
743 
744  bool enableMachineScheduler() const override {
745  return true;
746  }
747 
748  bool enableSubRegLiveness() const override {
749  return true;
750  }
751 
752  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
754 
755  /// \returns Number of execution units per compute unit supported by the
756  /// subtarget.
757  unsigned getEUsPerCU() const {
758  return AMDGPU::IsaInfo::getEUsPerCU(this);
759  }
760 
761  /// \returns Maximum number of waves per compute unit supported by the
762  /// subtarget without any kind of limitation.
763  unsigned getMaxWavesPerCU() const {
765  }
766 
767  /// \returns Maximum number of waves per compute unit supported by the
768  /// subtarget and limited by given \p FlatWorkGroupSize.
769  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
770  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
771  }
772 
773  /// \returns Maximum number of waves per execution unit supported by the
774  /// subtarget without any kind of limitation.
775  unsigned getMaxWavesPerEU() const {
777  }
778 
779  /// \returns Number of waves per work group supported by the subtarget and
780  /// limited by given \p FlatWorkGroupSize.
781  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
782  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
783  }
784 
785  // static wrappers
786  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
787 
788  // XXX - Why is this here if it isn't in the default pass set?
789  bool enableEarlyIfConversion() const override {
790  return true;
791  }
792 
793  void overrideSchedPolicy(MachineSchedPolicy &Policy,
794  unsigned NumRegionInstrs) const override;
795 
796  unsigned getMaxNumUserSGPRs() const {
797  return 16;
798  }
799 
800  bool hasSMemRealTime() const {
801  return HasSMemRealTime;
802  }
803 
804  bool hasMovrel() const {
805  return HasMovrel;
806  }
807 
808  bool hasVGPRIndexMode() const {
809  return HasVGPRIndexMode;
810  }
811 
812  bool useVGPRIndexMode(bool UserEnable) const {
813  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
814  }
815 
816  bool hasScalarCompareEq64() const {
817  return getGeneration() >= VOLCANIC_ISLANDS;
818  }
819 
820  bool hasScalarStores() const {
821  return HasScalarStores;
822  }
823 
824  bool hasScalarAtomics() const {
825  return HasScalarAtomics;
826  }
827 
828  bool hasLDSFPAtomics() const {
829  return GFX8Insts;
830  }
831 
832  bool hasDPP() const {
833  return HasDPP;
834  }
835 
836  bool hasR128A16() const {
837  return HasR128A16;
838  }
839 
840  bool hasNSAEncoding() const {
841  return HasNSAEncoding;
842  }
843 
844  bool hasMadF16() const;
845 
846  bool enableSIScheduler() const {
847  return EnableSIScheduler;
848  }
849 
850  bool loadStoreOptEnabled() const {
851  return EnableLoadStoreOpt;
852  }
853 
854  bool hasSGPRInitBug() const {
855  return SGPRInitBug;
856  }
857 
858  bool has12DWordStoreHazard() const {
859  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
860  }
861 
862  // \returns true if the subtarget supports DWORDX3 load/store instructions.
863  bool hasDwordx3LoadStores() const {
864  return CIInsts;
865  }
866 
867  bool hasSMovFedHazard() const {
868  return getGeneration() == AMDGPUSubtarget::GFX9;
869  }
870 
872  return getGeneration() == AMDGPUSubtarget::GFX9;
873  }
874 
875  bool hasReadM0SendMsgHazard() const {
876  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
877  getGeneration() <= AMDGPUSubtarget::GFX9;
878  }
879 
880  bool hasVcmpxPermlaneHazard() const {
881  return HasVcmpxPermlaneHazard;
882  }
883 
885  return HasVMEMtoScalarWriteHazard;
886  }
887 
889  return HasSMEMtoVectorWriteHazard;
890  }
891 
892  bool hasLDSMisalignedBug() const {
893  return LDSMisalignedBug && !EnableCuMode;
894  }
895 
896  bool hasInstFwdPrefetchBug() const {
897  return HasInstFwdPrefetchBug;
898  }
899 
900  bool hasVcmpxExecWARHazard() const {
901  return HasVcmpxExecWARHazard;
902  }
903 
905  return HasLdsBranchVmemWARHazard;
906  }
907 
908  bool hasNSAtoVMEMBug() const {
909  return HasNSAtoVMEMBug;
910  }
911 
912  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
913  /// SGPRs
914  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
915 
916  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
917  /// VGPRs
918  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
919 
920  /// \returns true if the flat_scratch register should be initialized with the
921  /// pointer to the wave's scratch memory rather than a size and offset.
922  bool flatScratchIsPointer() const {
923  return getGeneration() >= AMDGPUSubtarget::GFX9;
924  }
925 
926  /// \returns true if the machine has merged shaders in which s0-s7 are
927  /// reserved by the hardware and user SGPRs start at s8
928  bool hasMergedShaders() const {
929  return getGeneration() >= GFX9;
930  }
931 
932  /// \returns SGPR allocation granularity supported by the subtarget.
933  unsigned getSGPRAllocGranule() const {
935  }
936 
937  /// \returns SGPR encoding granularity supported by the subtarget.
938  unsigned getSGPREncodingGranule() const {
940  }
941 
942  /// \returns Total number of SGPRs supported by the subtarget.
943  unsigned getTotalNumSGPRs() const {
945  }
946 
947  /// \returns Addressable number of SGPRs supported by the subtarget.
948  unsigned getAddressableNumSGPRs() const {
950  }
951 
952  /// \returns Minimum number of SGPRs that meets the given number of waves per
953  /// execution unit requirement supported by the subtarget.
954  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
955  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
956  }
957 
958  /// \returns Maximum number of SGPRs that meets the given number of waves per
959  /// execution unit requirement supported by the subtarget.
960  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
961  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
962  }
963 
964  /// \returns Reserved number of SGPRs for given function \p MF.
965  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
966 
967  /// \returns Maximum number of SGPRs that meets number of waves per execution
968  /// unit requirement for function \p MF, or number of SGPRs explicitly
969  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
970  ///
971  /// \returns Value that meets number of waves per execution unit requirement
972  /// if explicitly requested value cannot be converted to integer, violates
973  /// subtarget's specifications, or does not meet number of waves per execution
974  /// unit requirement.
975  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
976 
977  /// \returns VGPR allocation granularity supported by the subtarget.
978  unsigned getVGPRAllocGranule() const {
980  }
981 
982  /// \returns VGPR encoding granularity supported by the subtarget.
983  unsigned getVGPREncodingGranule() const {
985  }
986 
987  /// \returns Total number of VGPRs supported by the subtarget.
988  unsigned getTotalNumVGPRs() const {
990  }
991 
992  /// \returns Addressable number of VGPRs supported by the subtarget.
993  unsigned getAddressableNumVGPRs() const {
995  }
996 
997  /// \returns Minimum number of VGPRs that meets given number of waves per
998  /// execution unit requirement supported by the subtarget.
999  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1000  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1001  }
1002 
1003  /// \returns Maximum number of VGPRs that meets given number of waves per
1004  /// execution unit requirement supported by the subtarget.
1005  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1006  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1007  }
1008 
1009  /// \returns Maximum number of VGPRs that meets number of waves per execution
1010  /// unit requirement for function \p MF, or number of VGPRs explicitly
1011  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1012  ///
1013  /// \returns Value that meets number of waves per execution unit requirement
1014  /// if explicitly requested value cannot be converted to integer, violates
1015  /// subtarget's specifications, or does not meet number of waves per execution
1016  /// unit requirement.
1017  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1018 
1019  void getPostRAMutations(
1020  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1021  const override;
1022 
1023  /// \returns Maximum number of work groups per compute unit supported by the
1024  /// subtarget and limited by given \p FlatWorkGroupSize.
1025  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1026  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1027  }
1028 
1029  /// \returns Minimum flat work group size supported by the subtarget.
1030  unsigned getMinFlatWorkGroupSize() const override {
1032  }
1033 
1034  /// \returns Maximum flat work group size supported by the subtarget.
1035  unsigned getMaxFlatWorkGroupSize() const override {
1037  }
1038 
1039  /// \returns Maximum number of waves per execution unit supported by the
1040  /// subtarget and limited by given \p FlatWorkGroupSize.
1041  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1042  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1043  }
1044 
1045  /// \returns Minimum number of waves per execution unit supported by the
1046  /// subtarget.
1047  unsigned getMinWavesPerEU() const override {
1048  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1049  }
1050 };
1051 
1053  public AMDGPUSubtarget {
1054 private:
1055  R600InstrInfo InstrInfo;
1056  R600FrameLowering FrameLowering;
1057  bool FMA;
1058  bool CaymanISA;
1059  bool CFALUBug;
1060  bool HasVertexCache;
1061  bool R600ALUInst;
1062  bool FP64;
1063  short TexVTXClauseSize;
1064  Generation Gen;
1065  R600TargetLowering TLInfo;
1066  InstrItineraryData InstrItins;
1067  SelectionDAGTargetInfo TSInfo;
1068 
1069 public:
1070  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1071  const TargetMachine &TM);
1072 
1073  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1074 
1075  const R600FrameLowering *getFrameLowering() const override {
1076  return &FrameLowering;
1077  }
1078 
1079  const R600TargetLowering *getTargetLowering() const override {
1080  return &TLInfo;
1081  }
1082 
1083  const R600RegisterInfo *getRegisterInfo() const override {
1084  return &InstrInfo.getRegisterInfo();
1085  }
1086 
1087  const InstrItineraryData *getInstrItineraryData() const override {
1088  return &InstrItins;
1089  }
1090 
1091  // Nothing implemented, just prevent crashes on use.
1093  return &TSInfo;
1094  }
1095 
1096  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1097 
1099  return Gen;
1100  }
1101 
1102  unsigned getStackAlignment() const {
1103  return 4;
1104  }
1105 
1106  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1107  StringRef GPU, StringRef FS);
1108 
1109  bool hasBFE() const {
1110  return (getGeneration() >= EVERGREEN);
1111  }
1112 
1113  bool hasBFI() const {
1114  return (getGeneration() >= EVERGREEN);
1115  }
1116 
1117  bool hasBCNT(unsigned Size) const {
1118  if (Size == 32)
1119  return (getGeneration() >= EVERGREEN);
1120 
1121  return false;
1122  }
1123 
1124  bool hasBORROW() const {
1125  return (getGeneration() >= EVERGREEN);
1126  }
1127 
1128  bool hasCARRY() const {
1129  return (getGeneration() >= EVERGREEN);
1130  }
1131 
1132  bool hasCaymanISA() const {
1133  return CaymanISA;
1134  }
1135 
1136  bool hasFFBL() const {
1137  return (getGeneration() >= EVERGREEN);
1138  }
1139 
1140  bool hasFFBH() const {
1141  return (getGeneration() >= EVERGREEN);
1142  }
1143 
1144  bool hasFMA() const { return FMA; }
1145 
1146  bool hasCFAluBug() const { return CFALUBug; }
1147 
1148  bool hasVertexCache() const { return HasVertexCache; }
1149 
1150  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1151 
1152  bool enableMachineScheduler() const override {
1153  return true;
1154  }
1155 
1156  bool enableSubRegLiveness() const override {
1157  return true;
1158  }
1159 
1160  /// \returns Maximum number of work groups per compute unit supported by the
1161  /// subtarget and limited by given \p FlatWorkGroupSize.
1162  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1163  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1164  }
1165 
1166  /// \returns Minimum flat work group size supported by the subtarget.
1167  unsigned getMinFlatWorkGroupSize() const override {
1169  }
1170 
1171  /// \returns Maximum flat work group size supported by the subtarget.
1172  unsigned getMaxFlatWorkGroupSize() const override {
1174  }
1175 
1176  /// \returns Maximum number of waves per execution unit supported by the
1177  /// subtarget and limited by given \p FlatWorkGroupSize.
1178  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1179  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1180  }
1181 
1182  /// \returns Minimum number of waves per execution unit supported by the
1183  /// subtarget.
1184  unsigned getMinWavesPerEU() const override {
1185  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1186  }
1187 };
1188 
1189 } // end namespace llvm
1190 
1191 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasVscnt() const
bool hasSDWAOmod() const
bool hasLDSMisalignedBug() const
bool hasVOP3Literal() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:303
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
bool hasNSAtoVMEMBug() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasVcmpxPermlaneHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasInstFwdPrefetchBug() const
bool hasFastFMAF32() const
Generation getGeneration() const
bool hasFlatSegmentOffsetBug() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasLdsBranchVmemWARHazard() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasVMEMtoScalarWriteHazard() const
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasNoDataDepHazard() const
bool dumpCode() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasNSAEncoding() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool isCuModeEnabled() const
bool hasLDSFPAtomics() const
bool hasSMEMtoVectorWriteHazard() const
const R600InstrInfo * getInstrInfo() const override
bool hasRegisterBanking() const
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasVcmpxExecWARHazard() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
bool hasScalarFlatScratchInsts() const
unsigned getAlignmentForImplicitArgPtr() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
bool hasNoSdstCMPX() const
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override