LLVM  9.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7
59  };
60 
61 private:
62  Triple TargetTriple;
63 
64 protected:
69  bool HasSDWA;
71  bool HasMulI24;
72  bool HasMulU24;
78  unsigned WavefrontSize;
79 
80 public:
81  AMDGPUSubtarget(const Triple &TT);
82 
83  static const AMDGPUSubtarget &get(const MachineFunction &MF);
84  static const AMDGPUSubtarget &get(const TargetMachine &TM,
85  const Function &F);
86 
87  /// \returns Default range flat work group size for a calling convention.
88  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
89 
90  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
91  /// for function \p F, or minimum/maximum flat work group sizes explicitly
92  /// requested using "amdgpu-flat-work-group-size" attribute attached to
93  /// function \p F.
94  ///
95  /// \returns Subtarget's default values if explicitly requested values cannot
96  /// be converted to integer, or violate subtarget's specifications.
97  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
98 
99  /// \returns Subtarget's default pair of minimum/maximum number of waves per
100  /// execution unit for function \p F, or minimum/maximum number of waves per
101  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
102  /// attached to function \p F.
103  ///
104  /// \returns Subtarget's default values if explicitly requested values cannot
105  /// be converted to integer, violate subtarget's specifications, or are not
106  /// compatible with minimum/maximum number of waves limited by flat work group
107  /// size, register usage, and/or lds usage.
108  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
109 
110  /// Return the amount of LDS that can be used that will not restrict the
111  /// occupancy lower than WaveCount.
112  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
113  const Function &) const;
114 
115  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
116  /// the given LDS memory size is the only constraint.
117  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
118 
119  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
120 
121  bool isAmdHsaOS() const {
122  return TargetTriple.getOS() == Triple::AMDHSA;
123  }
124 
125  bool isAmdPalOS() const {
126  return TargetTriple.getOS() == Triple::AMDPAL;
127  }
128 
129  bool isMesa3DOS() const {
130  return TargetTriple.getOS() == Triple::Mesa3D;
131  }
132 
133  bool isMesaKernel(const Function &F) const {
134  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
135  }
136 
137  bool isAmdHsaOrMesa(const Function &F) const {
138  return isAmdHsaOS() || isMesaKernel(F);
139  }
140 
141  bool has16BitInsts() const {
142  return Has16BitInsts;
143  }
144 
145  bool hasMadMixInsts() const {
146  return HasMadMixInsts;
147  }
148 
149  bool hasFP32Denormals() const {
150  return FP32Denormals;
151  }
152 
153  bool hasFPExceptions() const {
154  return FPExceptions;
155  }
156 
157  bool hasSDWA() const {
158  return HasSDWA;
159  }
160 
161  bool hasVOP3PInsts() const {
162  return HasVOP3PInsts;
163  }
164 
165  bool hasMulI24() const {
166  return HasMulI24;
167  }
168 
169  bool hasMulU24() const {
170  return HasMulU24;
171  }
172 
173  bool hasInv2PiInlineImm() const {
174  return HasInv2PiInlineImm;
175  }
176 
177  bool hasFminFmaxLegacy() const {
178  return HasFminFmaxLegacy;
179  }
180 
181  bool hasTrigReducedRange() const {
182  return HasTrigReducedRange;
183  }
184 
185  bool isPromoteAllocaEnabled() const {
186  return EnablePromoteAlloca;
187  }
188 
189  unsigned getWavefrontSize() const {
190  return WavefrontSize;
191  }
192 
193  int getLocalMemorySize() const {
194  return LocalMemorySize;
195  }
196 
197  unsigned getAlignmentForImplicitArgPtr() const {
198  return isAmdHsaOS() ? 8 : 4;
199  }
200 
201  /// Returns the offset in bytes from the start of the input buffer
202  /// of the first explicit kernel argument.
203  unsigned getExplicitKernelArgOffset(const Function &F) const {
204  return isAmdHsaOrMesa(F) ? 0 : 36;
205  }
206 
207  /// \returns Maximum number of work groups per compute unit supported by the
208  /// subtarget and limited by given \p FlatWorkGroupSize.
209  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
210 
211  /// \returns Minimum flat work group size supported by the subtarget.
212  virtual unsigned getMinFlatWorkGroupSize() const = 0;
213 
214  /// \returns Maximum flat work group size supported by the subtarget.
215  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
216 
217  /// \returns Maximum number of waves per execution unit supported by the
218  /// subtarget and limited by given \p FlatWorkGroupSize.
219  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
220 
221  /// \returns Minimum number of waves per execution unit supported by the
222  /// subtarget.
223  virtual unsigned getMinWavesPerEU() const = 0;
224 
225  unsigned getMaxWavesPerEU() const { return 10; }
226 
227  /// Creates value range metadata on an workitemid.* inrinsic call or load.
228  bool makeLIDRangeMetadata(Instruction *I) const;
229 
230  /// \returns Number of bytes of arguments that are passed to a shader or
231  /// kernel in addition to the explicit ones declared for the function.
232  unsigned getImplicitArgNumBytes(const Function &F) const {
233  if (isMesaKernel(F))
234  return 16;
235  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
236  }
237  uint64_t getExplicitKernArgSize(const Function &F,
238  unsigned &MaxAlign) const;
239  unsigned getKernArgSegmentSize(const Function &F,
240  unsigned &MaxAlign) const;
241 
242  virtual ~AMDGPUSubtarget() {}
243 };
244 
246  public AMDGPUSubtarget {
247 public:
249  TrapHandlerAbiNone = 0,
250  TrapHandlerAbiHsa = 1
251  };
252 
253  enum TrapID {
254  TrapIDHardwareReserved = 0,
255  TrapIDHSADebugTrap = 1,
256  TrapIDLLVMTrap = 2,
257  TrapIDLLVMDebugTrap = 3,
258  TrapIDDebugBreakpoint = 7,
259  TrapIDDebugReserved8 = 8,
260  TrapIDDebugReservedFE = 0xfe,
261  TrapIDDebugReservedFF = 0xff
262  };
263 
265  LLVMTrapHandlerRegValue = 1
266  };
267 
268 private:
269  /// GlobalISel related APIs.
270  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
271  std::unique_ptr<InstructionSelector> InstSelector;
272  std::unique_ptr<LegalizerInfo> Legalizer;
273  std::unique_ptr<RegisterBankInfo> RegBankInfo;
274 
275 protected:
276  // Basic subtarget description.
278  unsigned Gen;
282 
283  // Possibly statically set by tablegen, but may want to be overridden.
286 
287  // Dynamially set bits that enable features.
297 
298  // Used as options.
305  bool DumpCode;
306 
307  // Subtarget statically properties set by tablegen
308  bool FP64;
309  bool FMA;
310  bool MIMG_R128;
311  bool IsGCN;
313  bool CIInsts;
314  bool GFX8Insts;
315  bool GFX9Insts;
321  bool HasMovrel;
330  bool HasDPP;
344  bool CaymanISA;
345  bool CFALUBug;
349 
350  // Dummy feature to use for assembler in tablegen.
352 
354 private:
355  SIInstrInfo InstrInfo;
356  SITargetLowering TLInfo;
357  SIFrameLowering FrameLowering;
358 
359 public:
360  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
361  const GCNTargetMachine &TM);
362  ~GCNSubtarget() override;
363 
364  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
365  StringRef GPU, StringRef FS);
366 
367  const SIInstrInfo *getInstrInfo() const override {
368  return &InstrInfo;
369  }
370 
371  const SIFrameLowering *getFrameLowering() const override {
372  return &FrameLowering;
373  }
374 
375  const SITargetLowering *getTargetLowering() const override {
376  return &TLInfo;
377  }
378 
379  const SIRegisterInfo *getRegisterInfo() const override {
380  return &InstrInfo.getRegisterInfo();
381  }
382 
383  const CallLowering *getCallLowering() const override {
384  return CallLoweringInfo.get();
385  }
386 
387  const InstructionSelector *getInstructionSelector() const override {
388  return InstSelector.get();
389  }
390 
391  const LegalizerInfo *getLegalizerInfo() const override {
392  return Legalizer.get();
393  }
394 
395  const RegisterBankInfo *getRegBankInfo() const override {
396  return RegBankInfo.get();
397  }
398 
399  // Nothing implemented, just prevent crashes on use.
400  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
401  return &TSInfo;
402  }
403 
404  const InstrItineraryData *getInstrItineraryData() const override {
405  return &InstrItins;
406  }
407 
408  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
409 
411  return (Generation)Gen;
412  }
413 
414  unsigned getWavefrontSizeLog2() const {
415  return Log2_32(WavefrontSize);
416  }
417 
418  int getLDSBankCount() const {
419  return LDSBankCount;
420  }
421 
422  unsigned getMaxPrivateElementSize() const {
423  return MaxPrivateElementSize;
424  }
425 
426  bool hasIntClamp() const {
427  return HasIntClamp;
428  }
429 
430  bool hasFP64() const {
431  return FP64;
432  }
433 
434  bool hasMIMG_R128() const {
435  return MIMG_R128;
436  }
437 
438  bool hasHWFP64() const {
439  return FP64;
440  }
441 
442  bool hasFastFMAF32() const {
443  return FastFMAF32;
444  }
445 
446  bool hasHalfRate64Ops() const {
447  return HalfRate64Ops;
448  }
449 
450  bool hasAddr64() const {
451  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
452  }
453 
454  bool hasBFE() const {
455  return true;
456  }
457 
458  bool hasBFI() const {
459  return true;
460  }
461 
462  bool hasBFM() const {
463  return hasBFE();
464  }
465 
466  bool hasBCNT(unsigned Size) const {
467  return true;
468  }
469 
470  bool hasFFBL() const {
471  return true;
472  }
473 
474  bool hasFFBH() const {
475  return true;
476  }
477 
478  bool hasMed3_16() const {
479  return getGeneration() >= AMDGPUSubtarget::GFX9;
480  }
481 
482  bool hasMin3Max3_16() const {
483  return getGeneration() >= AMDGPUSubtarget::GFX9;
484  }
485 
486  bool hasFmaMixInsts() const {
487  return HasFmaMixInsts;
488  }
489 
490  bool hasCARRY() const {
491  return true;
492  }
493 
494  bool hasFMA() const {
495  return FMA;
496  }
497 
498  bool hasSwap() const {
499  return GFX9Insts;
500  }
501 
503  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
504  }
505 
506  bool enableHugePrivateBuffer() const {
507  return EnableHugePrivateBuffer;
508  }
509 
511  return EnableUnsafeDSOffsetFolding;
512  }
513 
514  bool dumpCode() const {
515  return DumpCode;
516  }
517 
518  /// Return the amount of LDS that can be used that will not restrict the
519  /// occupancy lower than WaveCount.
520  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
521  const Function &) const;
522 
523  bool hasFP16Denormals() const {
524  return FP64FP16Denormals;
525  }
526 
527  bool hasFP64Denormals() const {
528  return FP64FP16Denormals;
529  }
530 
532  return getGeneration() >= AMDGPUSubtarget::GFX9;
533  }
534 
535  bool useFlatForGlobal() const {
536  return FlatForGlobal;
537  }
538 
539  /// \returns If target supports ds_read/write_b128 and user enables generation
540  /// of ds_read/write_b128.
541  bool useDS128() const {
542  return CIInsts && EnableDS128;
543  }
544 
545  /// \returns If MUBUF instructions always perform range checking, even for
546  /// buffer resources used for private memory access.
548  return getGeneration() < AMDGPUSubtarget::GFX9;
549  }
550 
551  /// \returns If target requires PRT Struct NULL support (zero result registers
552  /// for sparse texture support).
553  bool usePRTStrictNull() const {
554  return EnablePRTStrictNull;
555  }
556 
558  return AutoWaitcntBeforeBarrier;
559  }
560 
561  bool hasCodeObjectV3() const {
562  // FIXME: Need to add code object v3 support for mesa and pal.
563  return isAmdHsaOS() ? CodeObjectV3 : false;
564  }
565 
567  return UnalignedBufferAccess;
568  }
569 
571  return UnalignedScratchAccess;
572  }
573 
574  bool hasApertureRegs() const {
575  return HasApertureRegs;
576  }
577 
578  bool isTrapHandlerEnabled() const {
579  return TrapHandler;
580  }
581 
582  bool isXNACKEnabled() const {
583  return EnableXNACK;
584  }
585 
586  bool hasFlatAddressSpace() const {
587  return FlatAddressSpace;
588  }
589 
590  bool hasFlatInstOffsets() const {
591  return FlatInstOffsets;
592  }
593 
594  bool hasFlatGlobalInsts() const {
595  return FlatGlobalInsts;
596  }
597 
598  bool hasFlatScratchInsts() const {
599  return FlatScratchInsts;
600  }
601 
603  return getGeneration() > GFX9;
604  }
605 
606  bool hasD16LoadStore() const {
607  return getGeneration() >= GFX9;
608  }
609 
610  bool d16PreservesUnusedBits() const {
611  return hasD16LoadStore() && !isSRAMECCEnabled();
612  }
613 
614  /// Return if most LDS instructions have an m0 use that require m0 to be
615  /// iniitalized.
616  bool ldsRequiresM0Init() const {
617  return getGeneration() < GFX9;
618  }
619 
620  bool hasAddNoCarry() const {
621  return AddNoCarryInsts;
622  }
623 
624  bool hasUnpackedD16VMem() const {
625  return HasUnpackedD16VMem;
626  }
627 
628  // Covers VS/PS/CS graphics shaders
629  bool isMesaGfxShader(const Function &F) const {
630  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
631  }
632 
633  bool hasMad64_32() const {
634  return getGeneration() >= SEA_ISLANDS;
635  }
636 
637  bool hasSDWAOmod() const {
638  return HasSDWAOmod;
639  }
640 
641  bool hasSDWAScalar() const {
642  return HasSDWAScalar;
643  }
644 
645  bool hasSDWASdst() const {
646  return HasSDWASdst;
647  }
648 
649  bool hasSDWAMac() const {
650  return HasSDWAMac;
651  }
652 
653  bool hasSDWAOutModsVOPC() const {
654  return HasSDWAOutModsVOPC;
655  }
656 
658  return getGeneration() < SEA_ISLANDS;
659  }
660 
661  bool hasDLInsts() const {
662  return HasDLInsts;
663  }
664 
665  bool hasDot1Insts() const {
666  return HasDot1Insts;
667  }
668 
669  bool hasDot2Insts() const {
670  return HasDot2Insts;
671  }
672 
673  bool isSRAMECCEnabled() const {
674  return EnableSRAMECC;
675  }
676 
677  // Scratch is allocated in 256 dword per wave blocks for the entire
678  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
679  // is 4-byte aligned.
680  //
681  // Only 4-byte alignment is really needed to access anything. Transformations
682  // on the pointer value itself may rely on the alignment / known low bits of
683  // the pointer. Set this to something above the minimum to avoid needing
684  // dynamic realignment in common cases.
685  unsigned getStackAlignment() const {
686  return 16;
687  }
688 
689  bool enableMachineScheduler() const override {
690  return true;
691  }
692 
693  bool enableSubRegLiveness() const override {
694  return true;
695  }
696 
697  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
699 
700  /// \returns Number of execution units per compute unit supported by the
701  /// subtarget.
702  unsigned getEUsPerCU() const {
703  return AMDGPU::IsaInfo::getEUsPerCU(this);
704  }
705 
706  /// \returns Maximum number of waves per compute unit supported by the
707  /// subtarget without any kind of limitation.
708  unsigned getMaxWavesPerCU() const {
710  }
711 
712  /// \returns Maximum number of waves per compute unit supported by the
713  /// subtarget and limited by given \p FlatWorkGroupSize.
714  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
715  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
716  }
717 
718  /// \returns Maximum number of waves per execution unit supported by the
719  /// subtarget without any kind of limitation.
720  unsigned getMaxWavesPerEU() const {
722  }
723 
724  /// \returns Number of waves per work group supported by the subtarget and
725  /// limited by given \p FlatWorkGroupSize.
726  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
727  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
728  }
729 
730  // static wrappers
731  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
732 
733  // XXX - Why is this here if it isn't in the default pass set?
734  bool enableEarlyIfConversion() const override {
735  return true;
736  }
737 
738  void overrideSchedPolicy(MachineSchedPolicy &Policy,
739  unsigned NumRegionInstrs) const override;
740 
741  unsigned getMaxNumUserSGPRs() const {
742  return 16;
743  }
744 
745  bool hasSMemRealTime() const {
746  return HasSMemRealTime;
747  }
748 
749  bool hasMovrel() const {
750  return HasMovrel;
751  }
752 
753  bool hasVGPRIndexMode() const {
754  return HasVGPRIndexMode;
755  }
756 
757  bool useVGPRIndexMode(bool UserEnable) const {
758  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
759  }
760 
761  bool hasScalarCompareEq64() const {
762  return getGeneration() >= VOLCANIC_ISLANDS;
763  }
764 
765  bool hasScalarStores() const {
766  return HasScalarStores;
767  }
768 
769  bool hasScalarAtomics() const {
770  return HasScalarAtomics;
771  }
772 
773  bool hasLDSFPAtomics() const {
774  return GFX8Insts;
775  }
776 
777  bool hasDPP() const {
778  return HasDPP;
779  }
780 
781  bool hasR128A16() const {
782  return HasR128A16;
783  }
784 
785  bool enableSIScheduler() const {
786  return EnableSIScheduler;
787  }
788 
789  bool loadStoreOptEnabled() const {
790  return EnableLoadStoreOpt;
791  }
792 
793  bool hasSGPRInitBug() const {
794  return SGPRInitBug;
795  }
796 
797  bool has12DWordStoreHazard() const {
798  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
799  }
800 
801  // \returns true if the subtarget supports DWORDX3 load/store instructions.
802  bool hasDwordx3LoadStores() const {
803  return CIInsts;
804  }
805 
806  bool hasSMovFedHazard() const {
807  return getGeneration() == AMDGPUSubtarget::GFX9;
808  }
809 
811  return getGeneration() == AMDGPUSubtarget::GFX9;
812  }
813 
814  bool hasReadM0SendMsgHazard() const {
815  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
816  getGeneration() <= AMDGPUSubtarget::GFX9;
817  }
818 
819  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
820  /// SGPRs
821  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
822 
823  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
824  /// VGPRs
825  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
826 
827  /// \returns true if the flat_scratch register should be initialized with the
828  /// pointer to the wave's scratch memory rather than a size and offset.
829  bool flatScratchIsPointer() const {
830  return getGeneration() >= AMDGPUSubtarget::GFX9;
831  }
832 
833  /// \returns true if the machine has merged shaders in which s0-s7 are
834  /// reserved by the hardware and user SGPRs start at s8
835  bool hasMergedShaders() const {
836  return getGeneration() >= GFX9;
837  }
838 
839  /// \returns SGPR allocation granularity supported by the subtarget.
840  unsigned getSGPRAllocGranule() const {
842  }
843 
844  /// \returns SGPR encoding granularity supported by the subtarget.
845  unsigned getSGPREncodingGranule() const {
847  }
848 
849  /// \returns Total number of SGPRs supported by the subtarget.
850  unsigned getTotalNumSGPRs() const {
852  }
853 
854  /// \returns Addressable number of SGPRs supported by the subtarget.
855  unsigned getAddressableNumSGPRs() const {
857  }
858 
859  /// \returns Minimum number of SGPRs that meets the given number of waves per
860  /// execution unit requirement supported by the subtarget.
861  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
862  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
863  }
864 
865  /// \returns Maximum number of SGPRs that meets the given number of waves per
866  /// execution unit requirement supported by the subtarget.
867  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
868  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
869  }
870 
871  /// \returns Reserved number of SGPRs for given function \p MF.
872  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
873 
874  /// \returns Maximum number of SGPRs that meets number of waves per execution
875  /// unit requirement for function \p MF, or number of SGPRs explicitly
876  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
877  ///
878  /// \returns Value that meets number of waves per execution unit requirement
879  /// if explicitly requested value cannot be converted to integer, violates
880  /// subtarget's specifications, or does not meet number of waves per execution
881  /// unit requirement.
882  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
883 
884  /// \returns VGPR allocation granularity supported by the subtarget.
885  unsigned getVGPRAllocGranule() const {
887  }
888 
889  /// \returns VGPR encoding granularity supported by the subtarget.
890  unsigned getVGPREncodingGranule() const {
892  }
893 
894  /// \returns Total number of VGPRs supported by the subtarget.
895  unsigned getTotalNumVGPRs() const {
897  }
898 
899  /// \returns Addressable number of VGPRs supported by the subtarget.
900  unsigned getAddressableNumVGPRs() const {
902  }
903 
904  /// \returns Minimum number of VGPRs that meets given number of waves per
905  /// execution unit requirement supported by the subtarget.
906  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
907  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
908  }
909 
910  /// \returns Maximum number of VGPRs that meets given number of waves per
911  /// execution unit requirement supported by the subtarget.
912  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
913  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
914  }
915 
916  /// \returns Maximum number of VGPRs that meets number of waves per execution
917  /// unit requirement for function \p MF, or number of VGPRs explicitly
918  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
919  ///
920  /// \returns Value that meets number of waves per execution unit requirement
921  /// if explicitly requested value cannot be converted to integer, violates
922  /// subtarget's specifications, or does not meet number of waves per execution
923  /// unit requirement.
924  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
925 
926  void getPostRAMutations(
927  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
928  const override;
929 
930  /// \returns Maximum number of work groups per compute unit supported by the
931  /// subtarget and limited by given \p FlatWorkGroupSize.
932  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
933  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
934  }
935 
936  /// \returns Minimum flat work group size supported by the subtarget.
937  unsigned getMinFlatWorkGroupSize() const override {
939  }
940 
941  /// \returns Maximum flat work group size supported by the subtarget.
942  unsigned getMaxFlatWorkGroupSize() const override {
944  }
945 
946  /// \returns Maximum number of waves per execution unit supported by the
947  /// subtarget and limited by given \p FlatWorkGroupSize.
948  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
949  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
950  }
951 
952  /// \returns Minimum number of waves per execution unit supported by the
953  /// subtarget.
954  unsigned getMinWavesPerEU() const override {
956  }
957 };
958 
959 class R600Subtarget final : public R600GenSubtargetInfo,
960  public AMDGPUSubtarget {
961 private:
962  R600InstrInfo InstrInfo;
963  R600FrameLowering FrameLowering;
964  bool FMA;
965  bool CaymanISA;
966  bool CFALUBug;
967  bool HasVertexCache;
968  bool R600ALUInst;
969  bool FP64;
970  short TexVTXClauseSize;
971  Generation Gen;
972  R600TargetLowering TLInfo;
973  InstrItineraryData InstrItins;
974  SelectionDAGTargetInfo TSInfo;
975 
976 public:
977  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
978  const TargetMachine &TM);
979 
980  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
981 
982  const R600FrameLowering *getFrameLowering() const override {
983  return &FrameLowering;
984  }
985 
986  const R600TargetLowering *getTargetLowering() const override {
987  return &TLInfo;
988  }
989 
990  const R600RegisterInfo *getRegisterInfo() const override {
991  return &InstrInfo.getRegisterInfo();
992  }
993 
994  const InstrItineraryData *getInstrItineraryData() const override {
995  return &InstrItins;
996  }
997 
998  // Nothing implemented, just prevent crashes on use.
999  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
1000  return &TSInfo;
1001  }
1002 
1003  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1004 
1006  return Gen;
1007  }
1008 
1009  unsigned getStackAlignment() const {
1010  return 4;
1011  }
1012 
1013  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1014  StringRef GPU, StringRef FS);
1015 
1016  bool hasBFE() const {
1017  return (getGeneration() >= EVERGREEN);
1018  }
1019 
1020  bool hasBFI() const {
1021  return (getGeneration() >= EVERGREEN);
1022  }
1023 
1024  bool hasBCNT(unsigned Size) const {
1025  if (Size == 32)
1026  return (getGeneration() >= EVERGREEN);
1027 
1028  return false;
1029  }
1030 
1031  bool hasBORROW() const {
1032  return (getGeneration() >= EVERGREEN);
1033  }
1034 
1035  bool hasCARRY() const {
1036  return (getGeneration() >= EVERGREEN);
1037  }
1038 
1039  bool hasCaymanISA() const {
1040  return CaymanISA;
1041  }
1042 
1043  bool hasFFBL() const {
1044  return (getGeneration() >= EVERGREEN);
1045  }
1046 
1047  bool hasFFBH() const {
1048  return (getGeneration() >= EVERGREEN);
1049  }
1050 
1051  bool hasFMA() const { return FMA; }
1052 
1053  bool hasCFAluBug() const { return CFALUBug; }
1054 
1055  bool hasVertexCache() const { return HasVertexCache; }
1056 
1057  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1058 
1059  bool enableMachineScheduler() const override {
1060  return true;
1061  }
1062 
1063  bool enableSubRegLiveness() const override {
1064  return true;
1065  }
1066 
1067  /// \returns Maximum number of work groups per compute unit supported by the
1068  /// subtarget and limited by given \p FlatWorkGroupSize.
1069  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1070  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1071  }
1072 
1073  /// \returns Minimum flat work group size supported by the subtarget.
1074  unsigned getMinFlatWorkGroupSize() const override {
1076  }
1077 
1078  /// \returns Maximum flat work group size supported by the subtarget.
1079  unsigned getMaxFlatWorkGroupSize() const override {
1081  }
1082 
1083  /// \returns Maximum number of waves per execution unit supported by the
1084  /// subtarget and limited by given \p FlatWorkGroupSize.
1085  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1086  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1087  }
1088 
1089  /// \returns Minimum number of waves per execution unit supported by the
1090  /// subtarget.
1091  unsigned getMinWavesPerEU() const override {
1092  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1093  }
1094 };
1095 
1096 } // end namespace llvm
1097 
1098 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasSDWAOmod() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:300
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasFastFMAF32() const
Generation getGeneration() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool dumpCode() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool hasLDSFPAtomics() const
const R600InstrInfo * getInstrInfo() const override
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
unsigned getAlignmentForImplicitArgPtr() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override