LLVM  9.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7
59  };
60 
61 private:
62  Triple TargetTriple;
63 
64 protected:
69  bool HasSDWA;
71  bool HasMulI24;
72  bool HasMulU24;
78  unsigned WavefrontSize;
79 
80 public:
81  AMDGPUSubtarget(const Triple &TT);
82 
83  static const AMDGPUSubtarget &get(const MachineFunction &MF);
84  static const AMDGPUSubtarget &get(const TargetMachine &TM,
85  const Function &F);
86 
87  /// \returns Default range flat work group size for a calling convention.
88  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
89 
90  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
91  /// for function \p F, or minimum/maximum flat work group sizes explicitly
92  /// requested using "amdgpu-flat-work-group-size" attribute attached to
93  /// function \p F.
94  ///
95  /// \returns Subtarget's default values if explicitly requested values cannot
96  /// be converted to integer, or violate subtarget's specifications.
97  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
98 
99  /// \returns Subtarget's default pair of minimum/maximum number of waves per
100  /// execution unit for function \p F, or minimum/maximum number of waves per
101  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
102  /// attached to function \p F.
103  ///
104  /// \returns Subtarget's default values if explicitly requested values cannot
105  /// be converted to integer, violate subtarget's specifications, or are not
106  /// compatible with minimum/maximum number of waves limited by flat work group
107  /// size, register usage, and/or lds usage.
108  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
109 
110  /// Return the amount of LDS that can be used that will not restrict the
111  /// occupancy lower than WaveCount.
112  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
113  const Function &) const;
114 
115  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
116  /// the given LDS memory size is the only constraint.
117  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
118 
119  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
120 
121  bool isAmdHsaOS() const {
122  return TargetTriple.getOS() == Triple::AMDHSA;
123  }
124 
125  bool isAmdPalOS() const {
126  return TargetTriple.getOS() == Triple::AMDPAL;
127  }
128 
129  bool isMesa3DOS() const {
130  return TargetTriple.getOS() == Triple::Mesa3D;
131  }
132 
133  bool isMesaKernel(const Function &F) const {
134  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
135  }
136 
137  bool isAmdHsaOrMesa(const Function &F) const {
138  return isAmdHsaOS() || isMesaKernel(F);
139  }
140 
141  bool has16BitInsts() const {
142  return Has16BitInsts;
143  }
144 
145  bool hasMadMixInsts() const {
146  return HasMadMixInsts;
147  }
148 
149  bool hasFP32Denormals() const {
150  return FP32Denormals;
151  }
152 
153  bool hasFPExceptions() const {
154  return FPExceptions;
155  }
156 
157  bool hasSDWA() const {
158  return HasSDWA;
159  }
160 
161  bool hasVOP3PInsts() const {
162  return HasVOP3PInsts;
163  }
164 
165  bool hasMulI24() const {
166  return HasMulI24;
167  }
168 
169  bool hasMulU24() const {
170  return HasMulU24;
171  }
172 
173  bool hasInv2PiInlineImm() const {
174  return HasInv2PiInlineImm;
175  }
176 
177  bool hasFminFmaxLegacy() const {
178  return HasFminFmaxLegacy;
179  }
180 
181  bool hasTrigReducedRange() const {
182  return HasTrigReducedRange;
183  }
184 
185  bool isPromoteAllocaEnabled() const {
186  return EnablePromoteAlloca;
187  }
188 
189  unsigned getWavefrontSize() const {
190  return WavefrontSize;
191  }
192 
193  int getLocalMemorySize() const {
194  return LocalMemorySize;
195  }
196 
197  unsigned getAlignmentForImplicitArgPtr() const {
198  return isAmdHsaOS() ? 8 : 4;
199  }
200 
201  /// Returns the offset in bytes from the start of the input buffer
202  /// of the first explicit kernel argument.
203  unsigned getExplicitKernelArgOffset(const Function &F) const {
204  return isAmdHsaOrMesa(F) ? 0 : 36;
205  }
206 
207  /// \returns Maximum number of work groups per compute unit supported by the
208  /// subtarget and limited by given \p FlatWorkGroupSize.
209  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
210 
211  /// \returns Minimum flat work group size supported by the subtarget.
212  virtual unsigned getMinFlatWorkGroupSize() const = 0;
213 
214  /// \returns Maximum flat work group size supported by the subtarget.
215  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
216 
217  /// \returns Maximum number of waves per execution unit supported by the
218  /// subtarget and limited by given \p FlatWorkGroupSize.
219  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
220 
221  /// \returns Minimum number of waves per execution unit supported by the
222  /// subtarget.
223  virtual unsigned getMinWavesPerEU() const = 0;
224 
225  unsigned getMaxWavesPerEU() const { return 10; }
226 
227  /// Creates value range metadata on an workitemid.* inrinsic call or load.
228  bool makeLIDRangeMetadata(Instruction *I) const;
229 
230  /// \returns Number of bytes of arguments that are passed to a shader or
231  /// kernel in addition to the explicit ones declared for the function.
232  unsigned getImplicitArgNumBytes(const Function &F) const {
233  if (isMesaKernel(F))
234  return 16;
235  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
236  }
237  uint64_t getExplicitKernArgSize(const Function &F,
238  unsigned &MaxAlign) const;
239  unsigned getKernArgSegmentSize(const Function &F,
240  unsigned &MaxAlign) const;
241 
242  virtual ~AMDGPUSubtarget() {}
243 };
244 
246  public AMDGPUSubtarget {
247 public:
249  TrapHandlerAbiNone = 0,
250  TrapHandlerAbiHsa = 1
251  };
252 
253  enum TrapID {
254  TrapIDHardwareReserved = 0,
255  TrapIDHSADebugTrap = 1,
256  TrapIDLLVMTrap = 2,
257  TrapIDLLVMDebugTrap = 3,
258  TrapIDDebugBreakpoint = 7,
259  TrapIDDebugReserved8 = 8,
260  TrapIDDebugReservedFE = 0xfe,
261  TrapIDDebugReservedFF = 0xff
262  };
263 
265  LLVMTrapHandlerRegValue = 1
266  };
267 
268 private:
269  /// GlobalISel related APIs.
270  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
271  std::unique_ptr<InstructionSelector> InstSelector;
272  std::unique_ptr<LegalizerInfo> Legalizer;
273  std::unique_ptr<RegisterBankInfo> RegBankInfo;
274 
275 protected:
276  // Basic subtarget description.
278  unsigned Gen;
282 
283  // Possibly statically set by tablegen, but may want to be overridden.
286 
287  // Dynamially set bits that enable features.
289  bool DX10Clamp;
300 
301  // Used as options.
308  bool DumpCode;
309 
310  // Subtarget statically properties set by tablegen
311  bool FP64;
312  bool FMA;
313  bool MIMG_R128;
314  bool IsGCN;
316  bool CIInsts;
317  bool VIInsts;
318  bool GFX9Insts;
323  bool HasMovrel;
332  bool HasDPP;
345  bool CaymanISA;
346  bool CFALUBug;
350 
351  // Dummy feature to use for assembler in tablegen.
353 
355 private:
356  SIInstrInfo InstrInfo;
357  SITargetLowering TLInfo;
358  SIFrameLowering FrameLowering;
359 
360 public:
361  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
362  const GCNTargetMachine &TM);
363  ~GCNSubtarget() override;
364 
365  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
366  StringRef GPU, StringRef FS);
367 
368  const SIInstrInfo *getInstrInfo() const override {
369  return &InstrInfo;
370  }
371 
372  const SIFrameLowering *getFrameLowering() const override {
373  return &FrameLowering;
374  }
375 
376  const SITargetLowering *getTargetLowering() const override {
377  return &TLInfo;
378  }
379 
380  const SIRegisterInfo *getRegisterInfo() const override {
381  return &InstrInfo.getRegisterInfo();
382  }
383 
384  const CallLowering *getCallLowering() const override {
385  return CallLoweringInfo.get();
386  }
387 
388  const InstructionSelector *getInstructionSelector() const override {
389  return InstSelector.get();
390  }
391 
392  const LegalizerInfo *getLegalizerInfo() const override {
393  return Legalizer.get();
394  }
395 
396  const RegisterBankInfo *getRegBankInfo() const override {
397  return RegBankInfo.get();
398  }
399 
400  // Nothing implemented, just prevent crashes on use.
401  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
402  return &TSInfo;
403  }
404 
405  const InstrItineraryData *getInstrItineraryData() const override {
406  return &InstrItins;
407  }
408 
409  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
410 
412  return (Generation)Gen;
413  }
414 
415  unsigned getWavefrontSizeLog2() const {
416  return Log2_32(WavefrontSize);
417  }
418 
419  int getLDSBankCount() const {
420  return LDSBankCount;
421  }
422 
423  unsigned getMaxPrivateElementSize() const {
424  return MaxPrivateElementSize;
425  }
426 
427  bool hasIntClamp() const {
428  return HasIntClamp;
429  }
430 
431  bool hasFP64() const {
432  return FP64;
433  }
434 
435  bool hasMIMG_R128() const {
436  return MIMG_R128;
437  }
438 
439  bool hasHWFP64() const {
440  return FP64;
441  }
442 
443  bool hasFastFMAF32() const {
444  return FastFMAF32;
445  }
446 
447  bool hasHalfRate64Ops() const {
448  return HalfRate64Ops;
449  }
450 
451  bool hasAddr64() const {
452  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
453  }
454 
455  bool hasBFE() const {
456  return true;
457  }
458 
459  bool hasBFI() const {
460  return true;
461  }
462 
463  bool hasBFM() const {
464  return hasBFE();
465  }
466 
467  bool hasBCNT(unsigned Size) const {
468  return true;
469  }
470 
471  bool hasFFBL() const {
472  return true;
473  }
474 
475  bool hasFFBH() const {
476  return true;
477  }
478 
479  bool hasMed3_16() const {
480  return getGeneration() >= AMDGPUSubtarget::GFX9;
481  }
482 
483  bool hasMin3Max3_16() const {
484  return getGeneration() >= AMDGPUSubtarget::GFX9;
485  }
486 
487  bool hasFmaMixInsts() const {
488  return HasFmaMixInsts;
489  }
490 
491  bool hasCARRY() const {
492  return true;
493  }
494 
495  bool hasFMA() const {
496  return FMA;
497  }
498 
499  bool hasSwap() const {
500  return GFX9Insts;
501  }
502 
504  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
505  }
506 
507  bool enableHugePrivateBuffer() const {
508  return EnableHugePrivateBuffer;
509  }
510 
512  return EnableUnsafeDSOffsetFolding;
513  }
514 
515  bool dumpCode() const {
516  return DumpCode;
517  }
518 
519  /// Return the amount of LDS that can be used that will not restrict the
520  /// occupancy lower than WaveCount.
521  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
522  const Function &) const;
523 
524  bool hasFP16Denormals() const {
525  return FP64FP16Denormals;
526  }
527 
528  bool hasFP64Denormals() const {
529  return FP64FP16Denormals;
530  }
531 
533  return getGeneration() >= AMDGPUSubtarget::GFX9;
534  }
535 
536  bool enableDX10Clamp() const {
537  return DX10Clamp;
538  }
539 
540  bool enableIEEEBit(const MachineFunction &MF) const {
542  }
543 
544  bool useFlatForGlobal() const {
545  return FlatForGlobal;
546  }
547 
548  /// \returns If target supports ds_read/write_b128 and user enables generation
549  /// of ds_read/write_b128.
550  bool useDS128() const {
551  return CIInsts && EnableDS128;
552  }
553 
554  /// \returns If MUBUF instructions always perform range checking, even for
555  /// buffer resources used for private memory access.
557  return getGeneration() < AMDGPUSubtarget::GFX9;
558  }
559 
560  /// \returns If target requires PRT Struct NULL support (zero result registers
561  /// for sparse texture support).
562  bool usePRTStrictNull() const {
563  return EnablePRTStrictNull;
564  }
565 
567  return AutoWaitcntBeforeBarrier;
568  }
569 
570  bool hasCodeObjectV3() const {
571  // FIXME: Need to add code object v3 support for mesa and pal.
572  return isAmdHsaOS() ? CodeObjectV3 : false;
573  }
574 
576  return UnalignedBufferAccess;
577  }
578 
580  return UnalignedScratchAccess;
581  }
582 
583  bool hasApertureRegs() const {
584  return HasApertureRegs;
585  }
586 
587  bool isTrapHandlerEnabled() const {
588  return TrapHandler;
589  }
590 
591  bool isXNACKEnabled() const {
592  return EnableXNACK;
593  }
594 
595  bool hasFlatAddressSpace() const {
596  return FlatAddressSpace;
597  }
598 
599  bool hasFlatInstOffsets() const {
600  return FlatInstOffsets;
601  }
602 
603  bool hasFlatGlobalInsts() const {
604  return FlatGlobalInsts;
605  }
606 
607  bool hasFlatScratchInsts() const {
608  return FlatScratchInsts;
609  }
610 
612  return getGeneration() > GFX9;
613  }
614 
615  bool hasD16LoadStore() const {
616  return getGeneration() >= GFX9;
617  }
618 
619  /// Return if most LDS instructions have an m0 use that require m0 to be
620  /// iniitalized.
621  bool ldsRequiresM0Init() const {
622  return getGeneration() < GFX9;
623  }
624 
625  bool hasAddNoCarry() const {
626  return AddNoCarryInsts;
627  }
628 
629  bool hasUnpackedD16VMem() const {
630  return HasUnpackedD16VMem;
631  }
632 
633  // Covers VS/PS/CS graphics shaders
634  bool isMesaGfxShader(const Function &F) const {
635  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
636  }
637 
638  bool hasMad64_32() const {
639  return getGeneration() >= SEA_ISLANDS;
640  }
641 
642  bool hasSDWAOmod() const {
643  return HasSDWAOmod;
644  }
645 
646  bool hasSDWAScalar() const {
647  return HasSDWAScalar;
648  }
649 
650  bool hasSDWASdst() const {
651  return HasSDWASdst;
652  }
653 
654  bool hasSDWAMac() const {
655  return HasSDWAMac;
656  }
657 
658  bool hasSDWAOutModsVOPC() const {
659  return HasSDWAOutModsVOPC;
660  }
661 
663  return getGeneration() < SEA_ISLANDS;
664  }
665 
666  bool hasDLInsts() const {
667  return HasDLInsts;
668  }
669 
670  bool hasDot1Insts() const {
671  return HasDot1Insts;
672  }
673 
674  bool hasDot2Insts() const {
675  return HasDot2Insts;
676  }
677 
678  bool isSRAMECCEnabled() const {
679  return EnableSRAMECC;
680  }
681 
682  // Scratch is allocated in 256 dword per wave blocks for the entire
683  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
684  // is 4-byte aligned.
685  //
686  // Only 4-byte alignment is really needed to access anything. Transformations
687  // on the pointer value itself may rely on the alignment / known low bits of
688  // the pointer. Set this to something above the minimum to avoid needing
689  // dynamic realignment in common cases.
690  unsigned getStackAlignment() const {
691  return 16;
692  }
693 
694  bool enableMachineScheduler() const override {
695  return true;
696  }
697 
698  bool enableSubRegLiveness() const override {
699  return true;
700  }
701 
702  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
704 
705  /// \returns Number of execution units per compute unit supported by the
706  /// subtarget.
707  unsigned getEUsPerCU() const {
708  return AMDGPU::IsaInfo::getEUsPerCU(this);
709  }
710 
711  /// \returns Maximum number of waves per compute unit supported by the
712  /// subtarget without any kind of limitation.
713  unsigned getMaxWavesPerCU() const {
715  }
716 
717  /// \returns Maximum number of waves per compute unit supported by the
718  /// subtarget and limited by given \p FlatWorkGroupSize.
719  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
720  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
721  }
722 
723  /// \returns Maximum number of waves per execution unit supported by the
724  /// subtarget without any kind of limitation.
725  unsigned getMaxWavesPerEU() const {
727  }
728 
729  /// \returns Number of waves per work group supported by the subtarget and
730  /// limited by given \p FlatWorkGroupSize.
731  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
732  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
733  }
734 
735  // static wrappers
736  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
737 
738  // XXX - Why is this here if it isn't in the default pass set?
739  bool enableEarlyIfConversion() const override {
740  return true;
741  }
742 
743  void overrideSchedPolicy(MachineSchedPolicy &Policy,
744  unsigned NumRegionInstrs) const override;
745 
746  unsigned getMaxNumUserSGPRs() const {
747  return 16;
748  }
749 
750  bool hasSMemRealTime() const {
751  return HasSMemRealTime;
752  }
753 
754  bool hasMovrel() const {
755  return HasMovrel;
756  }
757 
758  bool hasVGPRIndexMode() const {
759  return HasVGPRIndexMode;
760  }
761 
762  bool useVGPRIndexMode(bool UserEnable) const {
763  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
764  }
765 
766  bool hasScalarCompareEq64() const {
767  return getGeneration() >= VOLCANIC_ISLANDS;
768  }
769 
770  bool hasScalarStores() const {
771  return HasScalarStores;
772  }
773 
774  bool hasScalarAtomics() const {
775  return HasScalarAtomics;
776  }
777 
778  bool hasLDSFPAtomics() const {
779  return VIInsts;
780  }
781 
782  bool hasDPP() const {
783  return HasDPP;
784  }
785 
786  bool hasR128A16() const {
787  return HasR128A16;
788  }
789 
790  bool enableSIScheduler() const {
791  return EnableSIScheduler;
792  }
793 
794  bool debuggerSupported() const {
795  return debuggerInsertNops() && debuggerEmitPrologue();
796  }
797 
798  bool debuggerInsertNops() const {
799  return DebuggerInsertNops;
800  }
801 
802  bool debuggerEmitPrologue() const {
803  return DebuggerEmitPrologue;
804  }
805 
806  bool loadStoreOptEnabled() const {
807  return EnableLoadStoreOpt;
808  }
809 
810  bool hasSGPRInitBug() const {
811  return SGPRInitBug;
812  }
813 
814  bool has12DWordStoreHazard() const {
815  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
816  }
817 
818  // \returns true if the subtarget supports DWORDX3 load/store instructions.
819  bool hasDwordx3LoadStores() const {
820  return CIInsts;
821  }
822 
823  bool hasSMovFedHazard() const {
824  return getGeneration() >= AMDGPUSubtarget::GFX9;
825  }
826 
828  return getGeneration() >= AMDGPUSubtarget::GFX9;
829  }
830 
831  bool hasReadM0SendMsgHazard() const {
832  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
833  }
834 
835  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
836  /// SGPRs
837  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
838 
839  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
840  /// VGPRs
841  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
842 
843  /// \returns true if the flat_scratch register should be initialized with the
844  /// pointer to the wave's scratch memory rather than a size and offset.
845  bool flatScratchIsPointer() const {
846  return getGeneration() >= AMDGPUSubtarget::GFX9;
847  }
848 
849  /// \returns true if the machine has merged shaders in which s0-s7 are
850  /// reserved by the hardware and user SGPRs start at s8
851  bool hasMergedShaders() const {
852  return getGeneration() >= GFX9;
853  }
854 
855  /// \returns SGPR allocation granularity supported by the subtarget.
856  unsigned getSGPRAllocGranule() const {
858  }
859 
860  /// \returns SGPR encoding granularity supported by the subtarget.
861  unsigned getSGPREncodingGranule() const {
863  }
864 
865  /// \returns Total number of SGPRs supported by the subtarget.
866  unsigned getTotalNumSGPRs() const {
868  }
869 
870  /// \returns Addressable number of SGPRs supported by the subtarget.
871  unsigned getAddressableNumSGPRs() const {
873  }
874 
875  /// \returns Minimum number of SGPRs that meets the given number of waves per
876  /// execution unit requirement supported by the subtarget.
877  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
878  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
879  }
880 
881  /// \returns Maximum number of SGPRs that meets the given number of waves per
882  /// execution unit requirement supported by the subtarget.
883  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
884  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
885  }
886 
887  /// \returns Reserved number of SGPRs for given function \p MF.
888  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
889 
890  /// \returns Maximum number of SGPRs that meets number of waves per execution
891  /// unit requirement for function \p MF, or number of SGPRs explicitly
892  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
893  ///
894  /// \returns Value that meets number of waves per execution unit requirement
895  /// if explicitly requested value cannot be converted to integer, violates
896  /// subtarget's specifications, or does not meet number of waves per execution
897  /// unit requirement.
898  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
899 
900  /// \returns VGPR allocation granularity supported by the subtarget.
901  unsigned getVGPRAllocGranule() const {
903  }
904 
905  /// \returns VGPR encoding granularity supported by the subtarget.
906  unsigned getVGPREncodingGranule() const {
908  }
909 
910  /// \returns Total number of VGPRs supported by the subtarget.
911  unsigned getTotalNumVGPRs() const {
913  }
914 
915  /// \returns Addressable number of VGPRs supported by the subtarget.
916  unsigned getAddressableNumVGPRs() const {
918  }
919 
920  /// \returns Minimum number of VGPRs that meets given number of waves per
921  /// execution unit requirement supported by the subtarget.
922  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
923  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
924  }
925 
926  /// \returns Maximum number of VGPRs that meets given number of waves per
927  /// execution unit requirement supported by the subtarget.
928  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
929  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
930  }
931 
932  /// \returns Maximum number of VGPRs that meets number of waves per execution
933  /// unit requirement for function \p MF, or number of VGPRs explicitly
934  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
935  ///
936  /// \returns Value that meets number of waves per execution unit requirement
937  /// if explicitly requested value cannot be converted to integer, violates
938  /// subtarget's specifications, or does not meet number of waves per execution
939  /// unit requirement.
940  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
941 
942  void getPostRAMutations(
943  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
944  const override;
945 
946  /// \returns Maximum number of work groups per compute unit supported by the
947  /// subtarget and limited by given \p FlatWorkGroupSize.
948  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
949  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
950  }
951 
952  /// \returns Minimum flat work group size supported by the subtarget.
953  unsigned getMinFlatWorkGroupSize() const override {
955  }
956 
957  /// \returns Maximum flat work group size supported by the subtarget.
958  unsigned getMaxFlatWorkGroupSize() const override {
960  }
961 
962  /// \returns Maximum number of waves per execution unit supported by the
963  /// subtarget and limited by given \p FlatWorkGroupSize.
964  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
965  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
966  }
967 
968  /// \returns Minimum number of waves per execution unit supported by the
969  /// subtarget.
970  unsigned getMinWavesPerEU() const override {
972  }
973 };
974 
975 class R600Subtarget final : public R600GenSubtargetInfo,
976  public AMDGPUSubtarget {
977 private:
978  R600InstrInfo InstrInfo;
979  R600FrameLowering FrameLowering;
980  bool FMA;
981  bool CaymanISA;
982  bool CFALUBug;
983  bool DX10Clamp;
984  bool HasVertexCache;
985  bool R600ALUInst;
986  bool FP64;
987  short TexVTXClauseSize;
988  Generation Gen;
989  R600TargetLowering TLInfo;
990  InstrItineraryData InstrItins;
991  SelectionDAGTargetInfo TSInfo;
992 
993 public:
994  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
995  const TargetMachine &TM);
996 
997  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
998 
999  const R600FrameLowering *getFrameLowering() const override {
1000  return &FrameLowering;
1001  }
1002 
1003  const R600TargetLowering *getTargetLowering() const override {
1004  return &TLInfo;
1005  }
1006 
1007  const R600RegisterInfo *getRegisterInfo() const override {
1008  return &InstrInfo.getRegisterInfo();
1009  }
1010 
1011  const InstrItineraryData *getInstrItineraryData() const override {
1012  return &InstrItins;
1013  }
1014 
1015  // Nothing implemented, just prevent crashes on use.
1017  return &TSInfo;
1018  }
1019 
1020  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1021 
1023  return Gen;
1024  }
1025 
1026  unsigned getStackAlignment() const {
1027  return 4;
1028  }
1029 
1030  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1031  StringRef GPU, StringRef FS);
1032 
1033  bool hasBFE() const {
1034  return (getGeneration() >= EVERGREEN);
1035  }
1036 
1037  bool hasBFI() const {
1038  return (getGeneration() >= EVERGREEN);
1039  }
1040 
1041  bool hasBCNT(unsigned Size) const {
1042  if (Size == 32)
1043  return (getGeneration() >= EVERGREEN);
1044 
1045  return false;
1046  }
1047 
1048  bool hasBORROW() const {
1049  return (getGeneration() >= EVERGREEN);
1050  }
1051 
1052  bool hasCARRY() const {
1053  return (getGeneration() >= EVERGREEN);
1054  }
1055 
1056  bool hasCaymanISA() const {
1057  return CaymanISA;
1058  }
1059 
1060  bool hasFFBL() const {
1061  return (getGeneration() >= EVERGREEN);
1062  }
1063 
1064  bool hasFFBH() const {
1065  return (getGeneration() >= EVERGREEN);
1066  }
1067 
1068  bool hasFMA() const { return FMA; }
1069 
1070  bool hasCFAluBug() const { return CFALUBug; }
1071 
1072  bool hasVertexCache() const { return HasVertexCache; }
1073 
1074  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1075 
1076  bool enableMachineScheduler() const override {
1077  return true;
1078  }
1079 
1080  bool enableSubRegLiveness() const override {
1081  return true;
1082  }
1083 
1084  /// \returns Maximum number of work groups per compute unit supported by the
1085  /// subtarget and limited by given \p FlatWorkGroupSize.
1086  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1087  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1088  }
1089 
1090  /// \returns Minimum flat work group size supported by the subtarget.
1091  unsigned getMinFlatWorkGroupSize() const override {
1093  }
1094 
1095  /// \returns Maximum flat work group size supported by the subtarget.
1096  unsigned getMaxFlatWorkGroupSize() const override {
1098  }
1099 
1100  /// \returns Maximum number of waves per execution unit supported by the
1101  /// subtarget and limited by given \p FlatWorkGroupSize.
1102  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1103  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1104  }
1105 
1106  /// \returns Minimum number of waves per execution unit supported by the
1107  /// subtarget.
1108  unsigned getMinWavesPerEU() const override {
1109  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1110  }
1111 };
1112 
1113 } // end namespace llvm
1114 
1115 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool enableIEEEBit(const MachineFunction &MF) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasSDWAOmod() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool debuggerSupported() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:298
bool isPromoteAllocaEnabled() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasFastFMAF32() const
Generation getGeneration() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:164
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool enableDX10Clamp() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool debuggerInsertNops() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool dumpCode() const
bool debuggerEmitPrologue() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
bool isCompute(CallingConv::ID cc)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool hasLDSFPAtomics() const
const R600InstrInfo * getInstrInfo() const override
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override