LLVM  10.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7,
59  GFX10 = 8
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
78  unsigned MaxWavesPerEU;
80  unsigned WavefrontSize;
81 
82 public:
83  AMDGPUSubtarget(const Triple &TT);
84 
85  static const AMDGPUSubtarget &get(const MachineFunction &MF);
86  static const AMDGPUSubtarget &get(const TargetMachine &TM,
87  const Function &F);
88 
89  /// \returns Default range flat work group size for a calling convention.
90  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
91 
92  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
93  /// for function \p F, or minimum/maximum flat work group sizes explicitly
94  /// requested using "amdgpu-flat-work-group-size" attribute attached to
95  /// function \p F.
96  ///
97  /// \returns Subtarget's default values if explicitly requested values cannot
98  /// be converted to integer, or violate subtarget's specifications.
99  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
100 
101  /// \returns Subtarget's default pair of minimum/maximum number of waves per
102  /// execution unit for function \p F, or minimum/maximum number of waves per
103  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
104  /// attached to function \p F.
105  ///
106  /// \returns Subtarget's default values if explicitly requested values cannot
107  /// be converted to integer, violate subtarget's specifications, or are not
108  /// compatible with minimum/maximum number of waves limited by flat work group
109  /// size, register usage, and/or lds usage.
110  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
111 
112  /// Return the amount of LDS that can be used that will not restrict the
113  /// occupancy lower than WaveCount.
114  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
115  const Function &) const;
116 
117  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
118  /// the given LDS memory size is the only constraint.
119  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
120 
121  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
122 
123  bool isAmdHsaOS() const {
124  return TargetTriple.getOS() == Triple::AMDHSA;
125  }
126 
127  bool isAmdPalOS() const {
128  return TargetTriple.getOS() == Triple::AMDPAL;
129  }
130 
131  bool isMesa3DOS() const {
132  return TargetTriple.getOS() == Triple::Mesa3D;
133  }
134 
135  bool isMesaKernel(const Function &F) const {
136  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
137  }
138 
139  bool isAmdHsaOrMesa(const Function &F) const {
140  return isAmdHsaOS() || isMesaKernel(F);
141  }
142 
143  bool has16BitInsts() const {
144  return Has16BitInsts;
145  }
146 
147  bool hasMadMixInsts() const {
148  return HasMadMixInsts;
149  }
150 
151  bool hasFP32Denormals() const {
152  return FP32Denormals;
153  }
154 
155  bool hasFPExceptions() const {
156  return FPExceptions;
157  }
158 
159  bool hasSDWA() const {
160  return HasSDWA;
161  }
162 
163  bool hasVOP3PInsts() const {
164  return HasVOP3PInsts;
165  }
166 
167  bool hasMulI24() const {
168  return HasMulI24;
169  }
170 
171  bool hasMulU24() const {
172  return HasMulU24;
173  }
174 
175  bool hasInv2PiInlineImm() const {
176  return HasInv2PiInlineImm;
177  }
178 
179  bool hasFminFmaxLegacy() const {
180  return HasFminFmaxLegacy;
181  }
182 
183  bool hasTrigReducedRange() const {
184  return HasTrigReducedRange;
185  }
186 
187  bool isPromoteAllocaEnabled() const {
188  return EnablePromoteAlloca;
189  }
190 
191  unsigned getWavefrontSize() const {
192  return WavefrontSize;
193  }
194 
195  int getLocalMemorySize() const {
196  return LocalMemorySize;
197  }
198 
199  unsigned getAlignmentForImplicitArgPtr() const {
200  return isAmdHsaOS() ? 8 : 4;
201  }
202 
203  /// Returns the offset in bytes from the start of the input buffer
204  /// of the first explicit kernel argument.
205  unsigned getExplicitKernelArgOffset(const Function &F) const {
206  return isAmdHsaOrMesa(F) ? 0 : 36;
207  }
208 
209  /// \returns Maximum number of work groups per compute unit supported by the
210  /// subtarget and limited by given \p FlatWorkGroupSize.
211  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
212 
213  /// \returns Minimum flat work group size supported by the subtarget.
214  virtual unsigned getMinFlatWorkGroupSize() const = 0;
215 
216  /// \returns Maximum flat work group size supported by the subtarget.
217  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
218 
219  /// \returns Maximum number of waves per execution unit supported by the
220  /// subtarget and limited by given \p FlatWorkGroupSize.
221  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
222 
223  /// \returns Minimum number of waves per execution unit supported by the
224  /// subtarget.
225  virtual unsigned getMinWavesPerEU() const = 0;
226 
227  /// \returns Maximum number of waves per execution unit supported by the
228  /// subtarget without any kind of limitation.
229  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
230 
231  /// Creates value range metadata on an workitemid.* inrinsic call or load.
232  bool makeLIDRangeMetadata(Instruction *I) const;
233 
234  /// \returns Number of bytes of arguments that are passed to a shader or
235  /// kernel in addition to the explicit ones declared for the function.
236  unsigned getImplicitArgNumBytes(const Function &F) const {
237  if (isMesaKernel(F))
238  return 16;
239  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
240  }
241  uint64_t getExplicitKernArgSize(const Function &F,
242  unsigned &MaxAlign) const;
243  unsigned getKernArgSegmentSize(const Function &F,
244  unsigned &MaxAlign) const;
245 
246  virtual ~AMDGPUSubtarget() {}
247 };
248 
250  public AMDGPUSubtarget {
251 
253 
254 public:
256  TrapHandlerAbiNone = 0,
257  TrapHandlerAbiHsa = 1
258  };
259 
260  enum TrapID {
261  TrapIDHardwareReserved = 0,
262  TrapIDHSADebugTrap = 1,
263  TrapIDLLVMTrap = 2,
264  TrapIDLLVMDebugTrap = 3,
265  TrapIDDebugBreakpoint = 7,
266  TrapIDDebugReserved8 = 8,
267  TrapIDDebugReservedFE = 0xfe,
268  TrapIDDebugReservedFF = 0xff
269  };
270 
272  LLVMTrapHandlerRegValue = 1
273  };
274 
275 private:
276  /// GlobalISel related APIs.
277  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
278  std::unique_ptr<InstructionSelector> InstSelector;
279  std::unique_ptr<LegalizerInfo> Legalizer;
280  std::unique_ptr<RegisterBankInfo> RegBankInfo;
281 
282 protected:
283  // Basic subtarget description.
285  unsigned Gen;
289 
290  // Possibly statically set by tablegen, but may want to be overridden.
293 
294  // Dynamially set bits that enable features.
306 
307  // Used as options.
313  bool DumpCode;
314 
315  // Subtarget statically properties set by tablegen
316  bool FP64;
317  bool FMA;
318  bool MIMG_R128;
319  bool IsGCN;
321  bool CIInsts;
322  bool GFX8Insts;
323  bool GFX9Insts;
330  bool HasMovrel;
339  bool HasDPP;
340  bool HasDPP8;
356  bool HasVscnt;
368  bool CaymanISA;
369  bool CFALUBug;
374 
384 
385  // Dummy feature to use for assembler in tablegen.
387 
389 private:
390  SIInstrInfo InstrInfo;
391  SITargetLowering TLInfo;
392  SIFrameLowering FrameLowering;
393 
394  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
395  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
396 
397 public:
398  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
399  const GCNTargetMachine &TM);
400  ~GCNSubtarget() override;
401 
402  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
403  StringRef GPU, StringRef FS);
404 
405  const SIInstrInfo *getInstrInfo() const override {
406  return &InstrInfo;
407  }
408 
409  const SIFrameLowering *getFrameLowering() const override {
410  return &FrameLowering;
411  }
412 
413  const SITargetLowering *getTargetLowering() const override {
414  return &TLInfo;
415  }
416 
417  const SIRegisterInfo *getRegisterInfo() const override {
418  return &InstrInfo.getRegisterInfo();
419  }
420 
421  const CallLowering *getCallLowering() const override {
422  return CallLoweringInfo.get();
423  }
424 
426  return InstSelector.get();
427  }
428 
429  const LegalizerInfo *getLegalizerInfo() const override {
430  return Legalizer.get();
431  }
432 
433  const RegisterBankInfo *getRegBankInfo() const override {
434  return RegBankInfo.get();
435  }
436 
437  // Nothing implemented, just prevent crashes on use.
438  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
439  return &TSInfo;
440  }
441 
442  const InstrItineraryData *getInstrItineraryData() const override {
443  return &InstrItins;
444  }
445 
446  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
447 
449  return (Generation)Gen;
450  }
451 
452  unsigned getWavefrontSizeLog2() const {
453  return Log2_32(WavefrontSize);
454  }
455 
456  /// Return the number of high bits known to be zero fror a frame index.
458  return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
459  }
460 
461  int getLDSBankCount() const {
462  return LDSBankCount;
463  }
464 
465  unsigned getMaxPrivateElementSize() const {
466  return MaxPrivateElementSize;
467  }
468 
469  unsigned getConstantBusLimit(unsigned Opcode) const;
470 
471  bool hasIntClamp() const {
472  return HasIntClamp;
473  }
474 
475  bool hasFP64() const {
476  return FP64;
477  }
478 
479  bool hasMIMG_R128() const {
480  return MIMG_R128;
481  }
482 
483  bool hasHWFP64() const {
484  return FP64;
485  }
486 
487  bool hasFastFMAF32() const {
488  return FastFMAF32;
489  }
490 
491  bool hasHalfRate64Ops() const {
492  return HalfRate64Ops;
493  }
494 
495  bool hasAddr64() const {
496  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
497  }
498 
499  // Return true if the target only has the reverse operand versions of VALU
500  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
501  bool hasOnlyRevVALUShifts() const {
502  return getGeneration() >= VOLCANIC_ISLANDS;
503  }
504 
505  bool hasBFE() const {
506  return true;
507  }
508 
509  bool hasBFI() const {
510  return true;
511  }
512 
513  bool hasBFM() const {
514  return hasBFE();
515  }
516 
517  bool hasBCNT(unsigned Size) const {
518  return true;
519  }
520 
521  bool hasFFBL() const {
522  return true;
523  }
524 
525  bool hasFFBH() const {
526  return true;
527  }
528 
529  bool hasMed3_16() const {
530  return getGeneration() >= AMDGPUSubtarget::GFX9;
531  }
532 
533  bool hasMin3Max3_16() const {
534  return getGeneration() >= AMDGPUSubtarget::GFX9;
535  }
536 
537  bool hasFmaMixInsts() const {
538  return HasFmaMixInsts;
539  }
540 
541  bool hasCARRY() const {
542  return true;
543  }
544 
545  bool hasFMA() const {
546  return FMA;
547  }
548 
549  bool hasSwap() const {
550  return GFX9Insts;
551  }
552 
554  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
555  }
556 
557  /// True if the offset field of DS instructions works as expected. On SI, the
558  /// offset uses a 16-bit adder and does not always wrap properly.
559  bool hasUsableDSOffset() const {
560  return getGeneration() >= SEA_ISLANDS;
561  }
562 
564  return EnableUnsafeDSOffsetFolding;
565  }
566 
567  /// Condition output from div_scale is usable.
569  return getGeneration() != SOUTHERN_ISLANDS;
570  }
571 
572  /// Extra wait hazard is needed in some cases before
573  /// s_cbranch_vccnz/s_cbranch_vccz.
574  bool hasReadVCCZBug() const {
575  return getGeneration() <= SEA_ISLANDS;
576  }
577 
578  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
579  /// was written by a VALU instruction.
581  return getGeneration() == SOUTHERN_ISLANDS;
582  }
583 
584  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
585  /// SGPR was written by a VALU Instruction.
587  return getGeneration() >= VOLCANIC_ISLANDS;
588  }
589 
590  bool hasRFEHazards() const {
591  return getGeneration() >= VOLCANIC_ISLANDS;
592  }
593 
594  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
595  unsigned getSetRegWaitStates() const {
596  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
597  }
598 
599  bool dumpCode() const {
600  return DumpCode;
601  }
602 
603  /// Return the amount of LDS that can be used that will not restrict the
604  /// occupancy lower than WaveCount.
605  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
606  const Function &) const;
607 
608  bool hasFP16Denormals() const {
609  return FP64FP16Denormals;
610  }
611 
612  bool hasFP64Denormals() const {
613  return FP64FP16Denormals;
614  }
615 
617  return getGeneration() >= AMDGPUSubtarget::GFX9;
618  }
619 
620  /// \returns If target supports S_DENORM_MODE.
621  bool hasDenormModeInst() const {
622  return getGeneration() >= AMDGPUSubtarget::GFX10;
623  }
624 
625  bool useFlatForGlobal() const {
626  return FlatForGlobal;
627  }
628 
629  /// \returns If target supports ds_read/write_b128 and user enables generation
630  /// of ds_read/write_b128.
631  bool useDS128() const {
632  return CIInsts && EnableDS128;
633  }
634 
635  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
636  bool haveRoundOpsF64() const {
637  return CIInsts;
638  }
639 
640  /// \returns If MUBUF instructions always perform range checking, even for
641  /// buffer resources used for private memory access.
643  return getGeneration() < AMDGPUSubtarget::GFX9;
644  }
645 
646  /// \returns If target requires PRT Struct NULL support (zero result registers
647  /// for sparse texture support).
648  bool usePRTStrictNull() const {
649  return EnablePRTStrictNull;
650  }
651 
653  return AutoWaitcntBeforeBarrier;
654  }
655 
656  bool hasCodeObjectV3() const {
657  // FIXME: Need to add code object v3 support for mesa and pal.
658  return isAmdHsaOS() ? CodeObjectV3 : false;
659  }
660 
662  return UnalignedBufferAccess;
663  }
664 
666  return UnalignedScratchAccess;
667  }
668 
669  bool hasApertureRegs() const {
670  return HasApertureRegs;
671  }
672 
673  bool isTrapHandlerEnabled() const {
674  return TrapHandler;
675  }
676 
677  bool isXNACKEnabled() const {
678  return EnableXNACK;
679  }
680 
681  bool isCuModeEnabled() const {
682  return EnableCuMode;
683  }
684 
685  bool hasFlatAddressSpace() const {
686  return FlatAddressSpace;
687  }
688 
689  bool hasFlatScrRegister() const {
690  return hasFlatAddressSpace();
691  }
692 
693  bool hasFlatInstOffsets() const {
694  return FlatInstOffsets;
695  }
696 
697  bool hasFlatGlobalInsts() const {
698  return FlatGlobalInsts;
699  }
700 
701  bool hasFlatScratchInsts() const {
702  return FlatScratchInsts;
703  }
704 
706  return ScalarFlatScratchInsts;
707  }
708 
709  bool hasFlatSegmentOffsetBug() const {
710  return HasFlatSegmentOffsetBug;
711  }
712 
714  return getGeneration() > GFX9;
715  }
716 
717  bool hasD16LoadStore() const {
718  return getGeneration() >= GFX9;
719  }
720 
721  bool d16PreservesUnusedBits() const {
722  return hasD16LoadStore() && !isSRAMECCEnabled();
723  }
724 
725  bool hasD16Images() const {
726  return getGeneration() >= VOLCANIC_ISLANDS;
727  }
728 
729  /// Return if most LDS instructions have an m0 use that require m0 to be
730  /// iniitalized.
731  bool ldsRequiresM0Init() const {
732  return getGeneration() < GFX9;
733  }
734 
735  // True if the hardware rewinds and replays GWS operations if a wave is
736  // preempted.
737  //
738  // If this is false, a GWS operation requires testing if a nack set the
739  // MEM_VIOL bit, and repeating if so.
740  bool hasGWSAutoReplay() const {
741  return getGeneration() >= GFX9;
742  }
743 
744  /// \returns if target has ds_gws_sema_release_all instruction.
745  bool hasGWSSemaReleaseAll() const {
746  return CIInsts;
747  }
748 
749  bool hasAddNoCarry() const {
750  return AddNoCarryInsts;
751  }
752 
753  bool hasUnpackedD16VMem() const {
754  return HasUnpackedD16VMem;
755  }
756 
757  // Covers VS/PS/CS graphics shaders
758  bool isMesaGfxShader(const Function &F) const {
759  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
760  }
761 
762  bool hasMad64_32() const {
763  return getGeneration() >= SEA_ISLANDS;
764  }
765 
766  bool hasSDWAOmod() const {
767  return HasSDWAOmod;
768  }
769 
770  bool hasSDWAScalar() const {
771  return HasSDWAScalar;
772  }
773 
774  bool hasSDWASdst() const {
775  return HasSDWASdst;
776  }
777 
778  bool hasSDWAMac() const {
779  return HasSDWAMac;
780  }
781 
782  bool hasSDWAOutModsVOPC() const {
783  return HasSDWAOutModsVOPC;
784  }
785 
786  bool hasDLInsts() const {
787  return HasDLInsts;
788  }
789 
790  bool hasDot1Insts() const {
791  return HasDot1Insts;
792  }
793 
794  bool hasDot2Insts() const {
795  return HasDot2Insts;
796  }
797 
798  bool hasDot3Insts() const {
799  return HasDot3Insts;
800  }
801 
802  bool hasDot4Insts() const {
803  return HasDot4Insts;
804  }
805 
806  bool hasDot5Insts() const {
807  return HasDot5Insts;
808  }
809 
810  bool hasDot6Insts() const {
811  return HasDot6Insts;
812  }
813 
814  bool hasMAIInsts() const {
815  return HasMAIInsts;
816  }
817 
818  bool hasPkFmacF16Inst() const {
819  return HasPkFmacF16Inst;
820  }
821 
822  bool hasAtomicFaddInsts() const {
823  return HasAtomicFaddInsts;
824  }
825 
826  bool isSRAMECCEnabled() const {
827  return EnableSRAMECC;
828  }
829 
830  bool hasNoSdstCMPX() const {
831  return HasNoSdstCMPX;
832  }
833 
834  bool hasVscnt() const {
835  return HasVscnt;
836  }
837 
838  bool hasRegisterBanking() const {
839  return HasRegisterBanking;
840  }
841 
842  bool hasVOP3Literal() const {
843  return HasVOP3Literal;
844  }
845 
846  bool hasNoDataDepHazard() const {
847  return HasNoDataDepHazard;
848  }
849 
851  return getGeneration() < SEA_ISLANDS;
852  }
853 
854  // Scratch is allocated in 256 dword per wave blocks for the entire
855  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
856  // is 4-byte aligned.
857  //
858  // Only 4-byte alignment is really needed to access anything. Transformations
859  // on the pointer value itself may rely on the alignment / known low bits of
860  // the pointer. Set this to something above the minimum to avoid needing
861  // dynamic realignment in common cases.
862  unsigned getStackAlignment() const {
863  return 16;
864  }
865 
866  bool enableMachineScheduler() const override {
867  return true;
868  }
869 
870  bool enableSubRegLiveness() const override {
871  return true;
872  }
873 
874  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
876 
877  /// \returns Number of execution units per compute unit supported by the
878  /// subtarget.
879  unsigned getEUsPerCU() const {
880  return AMDGPU::IsaInfo::getEUsPerCU(this);
881  }
882 
883  /// \returns Maximum number of waves per compute unit supported by the
884  /// subtarget without any kind of limitation.
885  unsigned getMaxWavesPerCU() const {
887  }
888 
889  /// \returns Maximum number of waves per compute unit supported by the
890  /// subtarget and limited by given \p FlatWorkGroupSize.
891  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
892  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
893  }
894 
895  /// \returns Number of waves per work group supported by the subtarget and
896  /// limited by given \p FlatWorkGroupSize.
897  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
898  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
899  }
900 
901  // static wrappers
902  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
903 
904  // XXX - Why is this here if it isn't in the default pass set?
905  bool enableEarlyIfConversion() const override {
906  return true;
907  }
908 
909  void overrideSchedPolicy(MachineSchedPolicy &Policy,
910  unsigned NumRegionInstrs) const override;
911 
912  unsigned getMaxNumUserSGPRs() const {
913  return 16;
914  }
915 
916  bool hasSMemRealTime() const {
917  return HasSMemRealTime;
918  }
919 
920  bool hasMovrel() const {
921  return HasMovrel;
922  }
923 
924  bool hasVGPRIndexMode() const {
925  return HasVGPRIndexMode;
926  }
927 
928  bool useVGPRIndexMode(bool UserEnable) const {
929  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
930  }
931 
932  bool hasScalarCompareEq64() const {
933  return getGeneration() >= VOLCANIC_ISLANDS;
934  }
935 
936  bool hasScalarStores() const {
937  return HasScalarStores;
938  }
939 
940  bool hasScalarAtomics() const {
941  return HasScalarAtomics;
942  }
943 
944  bool hasLDSFPAtomics() const {
945  return GFX8Insts;
946  }
947 
948  bool hasDPP() const {
949  return HasDPP;
950  }
951 
952  bool hasDPP8() const {
953  return HasDPP8;
954  }
955 
956  bool hasR128A16() const {
957  return HasR128A16;
958  }
959 
960  bool hasOffset3fBug() const {
961  return HasOffset3fBug;
962  }
963 
964  bool hasNSAEncoding() const {
965  return HasNSAEncoding;
966  }
967 
968  bool hasMadF16() const;
969 
970  bool enableSIScheduler() const {
971  return EnableSIScheduler;
972  }
973 
974  bool loadStoreOptEnabled() const {
975  return EnableLoadStoreOpt;
976  }
977 
978  bool hasSGPRInitBug() const {
979  return SGPRInitBug;
980  }
981 
982  bool has12DWordStoreHazard() const {
983  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
984  }
985 
986  // \returns true if the subtarget supports DWORDX3 load/store instructions.
987  bool hasDwordx3LoadStores() const {
988  return CIInsts;
989  }
990 
991  bool hasSMovFedHazard() const {
992  return getGeneration() == AMDGPUSubtarget::GFX9;
993  }
994 
996  return getGeneration() == AMDGPUSubtarget::GFX9;
997  }
998 
999  bool hasReadM0SendMsgHazard() const {
1000  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1001  getGeneration() <= AMDGPUSubtarget::GFX9;
1002  }
1003 
1004  bool hasVcmpxPermlaneHazard() const {
1005  return HasVcmpxPermlaneHazard;
1006  }
1007 
1009  return HasVMEMtoScalarWriteHazard;
1010  }
1011 
1013  return HasSMEMtoVectorWriteHazard;
1014  }
1015 
1016  bool hasLDSMisalignedBug() const {
1017  return LDSMisalignedBug && !EnableCuMode;
1018  }
1019 
1020  bool hasInstFwdPrefetchBug() const {
1021  return HasInstFwdPrefetchBug;
1022  }
1023 
1024  bool hasVcmpxExecWARHazard() const {
1025  return HasVcmpxExecWARHazard;
1026  }
1027 
1029  return HasLdsBranchVmemWARHazard;
1030  }
1031 
1032  bool hasNSAtoVMEMBug() const {
1033  return HasNSAtoVMEMBug;
1034  }
1035 
1036  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1037  /// SGPRs
1038  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1039 
1040  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1041  /// VGPRs
1042  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1043 
1044  /// Return occupancy for the given function. Used LDS and a number of
1045  /// registers if provided.
1046  /// Note, occupancy can be affected by the scratch allocation as well, but
1047  /// we do not have enough information to compute it.
1048  unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
1049  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1050 
1051  /// \returns true if the flat_scratch register should be initialized with the
1052  /// pointer to the wave's scratch memory rather than a size and offset.
1053  bool flatScratchIsPointer() const {
1054  return getGeneration() >= AMDGPUSubtarget::GFX9;
1055  }
1056 
1057  /// \returns true if the machine has merged shaders in which s0-s7 are
1058  /// reserved by the hardware and user SGPRs start at s8
1059  bool hasMergedShaders() const {
1060  return getGeneration() >= GFX9;
1061  }
1062 
1063  /// \returns SGPR allocation granularity supported by the subtarget.
1064  unsigned getSGPRAllocGranule() const {
1066  }
1067 
1068  /// \returns SGPR encoding granularity supported by the subtarget.
1069  unsigned getSGPREncodingGranule() const {
1071  }
1072 
1073  /// \returns Total number of SGPRs supported by the subtarget.
1074  unsigned getTotalNumSGPRs() const {
1075  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1076  }
1077 
1078  /// \returns Addressable number of SGPRs supported by the subtarget.
1079  unsigned getAddressableNumSGPRs() const {
1081  }
1082 
1083  /// \returns Minimum number of SGPRs that meets the given number of waves per
1084  /// execution unit requirement supported by the subtarget.
1085  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1086  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1087  }
1088 
1089  /// \returns Maximum number of SGPRs that meets the given number of waves per
1090  /// execution unit requirement supported by the subtarget.
1091  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1092  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1093  }
1094 
1095  /// \returns Reserved number of SGPRs for given function \p MF.
1096  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1097 
1098  /// \returns Maximum number of SGPRs that meets number of waves per execution
1099  /// unit requirement for function \p MF, or number of SGPRs explicitly
1100  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1101  ///
1102  /// \returns Value that meets number of waves per execution unit requirement
1103  /// if explicitly requested value cannot be converted to integer, violates
1104  /// subtarget's specifications, or does not meet number of waves per execution
1105  /// unit requirement.
1106  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1107 
1108  /// \returns VGPR allocation granularity supported by the subtarget.
1109  unsigned getVGPRAllocGranule() const {
1111  }
1112 
1113  /// \returns VGPR encoding granularity supported by the subtarget.
1114  unsigned getVGPREncodingGranule() const {
1116  }
1117 
1118  /// \returns Total number of VGPRs supported by the subtarget.
1119  unsigned getTotalNumVGPRs() const {
1120  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1121  }
1122 
1123  /// \returns Addressable number of VGPRs supported by the subtarget.
1124  unsigned getAddressableNumVGPRs() const {
1126  }
1127 
1128  /// \returns Minimum number of VGPRs that meets given number of waves per
1129  /// execution unit requirement supported by the subtarget.
1130  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1131  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1132  }
1133 
1134  /// \returns Maximum number of VGPRs that meets given number of waves per
1135  /// execution unit requirement supported by the subtarget.
1136  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1137  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1138  }
1139 
1140  /// \returns Maximum number of VGPRs that meets number of waves per execution
1141  /// unit requirement for function \p MF, or number of VGPRs explicitly
1142  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1143  ///
1144  /// \returns Value that meets number of waves per execution unit requirement
1145  /// if explicitly requested value cannot be converted to integer, violates
1146  /// subtarget's specifications, or does not meet number of waves per execution
1147  /// unit requirement.
1148  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1149 
1150  void getPostRAMutations(
1151  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1152  const override;
1153 
1154  bool isWave32() const {
1155  return WavefrontSize == 32;
1156  }
1157 
1159  return getRegisterInfo()->getBoolRC();
1160  }
1161 
1162  /// \returns Maximum number of work groups per compute unit supported by the
1163  /// subtarget and limited by given \p FlatWorkGroupSize.
1164  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1165  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1166  }
1167 
1168  /// \returns Minimum flat work group size supported by the subtarget.
1169  unsigned getMinFlatWorkGroupSize() const override {
1171  }
1172 
1173  /// \returns Maximum flat work group size supported by the subtarget.
1174  unsigned getMaxFlatWorkGroupSize() const override {
1176  }
1177 
1178  /// \returns Maximum number of waves per execution unit supported by the
1179  /// subtarget and limited by given \p FlatWorkGroupSize.
1180  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1181  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1182  }
1183 
1184  /// \returns Minimum number of waves per execution unit supported by the
1185  /// subtarget.
1186  unsigned getMinWavesPerEU() const override {
1187  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1188  }
1189 };
1190 
1192  public AMDGPUSubtarget {
1193 private:
1194  R600InstrInfo InstrInfo;
1195  R600FrameLowering FrameLowering;
1196  bool FMA;
1197  bool CaymanISA;
1198  bool CFALUBug;
1199  bool HasVertexCache;
1200  bool R600ALUInst;
1201  bool FP64;
1202  short TexVTXClauseSize;
1203  Generation Gen;
1204  R600TargetLowering TLInfo;
1205  InstrItineraryData InstrItins;
1206  SelectionDAGTargetInfo TSInfo;
1207 
1208 public:
1209  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1210  const TargetMachine &TM);
1211 
1212  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1213 
1214  const R600FrameLowering *getFrameLowering() const override {
1215  return &FrameLowering;
1216  }
1217 
1218  const R600TargetLowering *getTargetLowering() const override {
1219  return &TLInfo;
1220  }
1221 
1222  const R600RegisterInfo *getRegisterInfo() const override {
1223  return &InstrInfo.getRegisterInfo();
1224  }
1225 
1226  const InstrItineraryData *getInstrItineraryData() const override {
1227  return &InstrItins;
1228  }
1229 
1230  // Nothing implemented, just prevent crashes on use.
1232  return &TSInfo;
1233  }
1234 
1235  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1236 
1238  return Gen;
1239  }
1240 
1241  unsigned getStackAlignment() const {
1242  return 4;
1243  }
1244 
1245  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1246  StringRef GPU, StringRef FS);
1247 
1248  bool hasBFE() const {
1249  return (getGeneration() >= EVERGREEN);
1250  }
1251 
1252  bool hasBFI() const {
1253  return (getGeneration() >= EVERGREEN);
1254  }
1255 
1256  bool hasBCNT(unsigned Size) const {
1257  if (Size == 32)
1258  return (getGeneration() >= EVERGREEN);
1259 
1260  return false;
1261  }
1262 
1263  bool hasBORROW() const {
1264  return (getGeneration() >= EVERGREEN);
1265  }
1266 
1267  bool hasCARRY() const {
1268  return (getGeneration() >= EVERGREEN);
1269  }
1270 
1271  bool hasCaymanISA() const {
1272  return CaymanISA;
1273  }
1274 
1275  bool hasFFBL() const {
1276  return (getGeneration() >= EVERGREEN);
1277  }
1278 
1279  bool hasFFBH() const {
1280  return (getGeneration() >= EVERGREEN);
1281  }
1282 
1283  bool hasFMA() const { return FMA; }
1284 
1285  bool hasCFAluBug() const { return CFALUBug; }
1286 
1287  bool hasVertexCache() const { return HasVertexCache; }
1288 
1289  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1290 
1291  bool enableMachineScheduler() const override {
1292  return true;
1293  }
1294 
1295  bool enableSubRegLiveness() const override {
1296  return true;
1297  }
1298 
1299  /// \returns Maximum number of work groups per compute unit supported by the
1300  /// subtarget and limited by given \p FlatWorkGroupSize.
1301  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1302  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1303  }
1304 
1305  /// \returns Minimum flat work group size supported by the subtarget.
1306  unsigned getMinFlatWorkGroupSize() const override {
1308  }
1309 
1310  /// \returns Maximum flat work group size supported by the subtarget.
1311  unsigned getMaxFlatWorkGroupSize() const override {
1313  }
1314 
1315  /// \returns Maximum number of waves per execution unit supported by the
1316  /// subtarget and limited by given \p FlatWorkGroupSize.
1317  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1318  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1319  }
1320 
1321  /// \returns Minimum number of waves per execution unit supported by the
1322  /// subtarget.
1323  unsigned getMinWavesPerEU() const override {
1324  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1325  }
1326 };
1327 
1328 } // end namespace llvm
1329 
1330 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasVscnt() const
bool hasSDWAOmod() const
bool hasLDSMisalignedBug() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasVOP3Literal() const
bool hasPkFmacF16Inst() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasFlatScrRegister() const
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
bool hasNSAtoVMEMBug() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasAtomicFaddInsts() const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasVcmpxPermlaneHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasInstFwdPrefetchBug() const
bool hasFastFMAF32() const
Generation getGeneration() const
bool hasFlatSegmentOffsetBug() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasLdsBranchVmemWARHazard() const
bool hasD16Images() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
bool hasDot4Insts() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
const TargetRegisterClass * getBoolRC() const
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasVMEMtoScalarWriteHazard() const
bool hasAddNoCarry() const
bool hasDot3Insts() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasNoDataDepHazard() const
bool dumpCode() const
bool hasDot6Insts() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasNSAEncoding() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool isCuModeEnabled() const
bool hasLDSFPAtomics() const
bool hasSMEMtoVectorWriteHazard() const
const R600InstrInfo * getInstrInfo() const override
bool hasRegisterBanking() const
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasGWSAutoReplay() const
bool hasOnlyRevVALUShifts() const
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasOffset3fBug() const
bool hasVcmpxExecWARHazard() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
bool hasGWSSemaReleaseAll() const
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDenormModeInst() const
bool enableSIScheduler() const
bool hasRFEHazards() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
bool hasScalarFlatScratchInsts() const
unsigned getAlignmentForImplicitArgPtr() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool hasDot5Insts() const
bool enableSubRegLiveness() const override
bool hasNoSdstCMPX() const
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasMAIInsts() const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool hasDPP8() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
InstructionSelector * getInstructionSelector() const override
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override