LLVM  10.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7,
59  GFX10 = 8
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
78  unsigned MaxWavesPerEU;
80  unsigned WavefrontSize;
81 
82 public:
83  AMDGPUSubtarget(const Triple &TT);
84 
85  static const AMDGPUSubtarget &get(const MachineFunction &MF);
86  static const AMDGPUSubtarget &get(const TargetMachine &TM,
87  const Function &F);
88 
89  /// \returns Default range flat work group size for a calling convention.
90  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
91 
92  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
93  /// for function \p F, or minimum/maximum flat work group sizes explicitly
94  /// requested using "amdgpu-flat-work-group-size" attribute attached to
95  /// function \p F.
96  ///
97  /// \returns Subtarget's default values if explicitly requested values cannot
98  /// be converted to integer, or violate subtarget's specifications.
99  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
100 
101  /// \returns Subtarget's default pair of minimum/maximum number of waves per
102  /// execution unit for function \p F, or minimum/maximum number of waves per
103  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
104  /// attached to function \p F.
105  ///
106  /// \returns Subtarget's default values if explicitly requested values cannot
107  /// be converted to integer, violate subtarget's specifications, or are not
108  /// compatible with minimum/maximum number of waves limited by flat work group
109  /// size, register usage, and/or lds usage.
110  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
111 
112  /// Return the amount of LDS that can be used that will not restrict the
113  /// occupancy lower than WaveCount.
114  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
115  const Function &) const;
116 
117  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
118  /// the given LDS memory size is the only constraint.
119  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
120 
121  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
122 
123  bool isAmdHsaOS() const {
124  return TargetTriple.getOS() == Triple::AMDHSA;
125  }
126 
127  bool isAmdPalOS() const {
128  return TargetTriple.getOS() == Triple::AMDPAL;
129  }
130 
131  bool isMesa3DOS() const {
132  return TargetTriple.getOS() == Triple::Mesa3D;
133  }
134 
135  bool isMesaKernel(const Function &F) const {
136  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
137  }
138 
139  bool isAmdHsaOrMesa(const Function &F) const {
140  return isAmdHsaOS() || isMesaKernel(F);
141  }
142 
143  bool has16BitInsts() const {
144  return Has16BitInsts;
145  }
146 
147  bool hasMadMixInsts() const {
148  return HasMadMixInsts;
149  }
150 
151  bool hasFP32Denormals() const {
152  return FP32Denormals;
153  }
154 
155  bool hasFPExceptions() const {
156  return FPExceptions;
157  }
158 
159  bool hasSDWA() const {
160  return HasSDWA;
161  }
162 
163  bool hasVOP3PInsts() const {
164  return HasVOP3PInsts;
165  }
166 
167  bool hasMulI24() const {
168  return HasMulI24;
169  }
170 
171  bool hasMulU24() const {
172  return HasMulU24;
173  }
174 
175  bool hasInv2PiInlineImm() const {
176  return HasInv2PiInlineImm;
177  }
178 
179  bool hasFminFmaxLegacy() const {
180  return HasFminFmaxLegacy;
181  }
182 
183  bool hasTrigReducedRange() const {
184  return HasTrigReducedRange;
185  }
186 
187  bool isPromoteAllocaEnabled() const {
188  return EnablePromoteAlloca;
189  }
190 
191  unsigned getWavefrontSize() const {
192  return WavefrontSize;
193  }
194 
195  int getLocalMemorySize() const {
196  return LocalMemorySize;
197  }
198 
199  unsigned getAlignmentForImplicitArgPtr() const {
200  return isAmdHsaOS() ? 8 : 4;
201  }
202 
203  /// Returns the offset in bytes from the start of the input buffer
204  /// of the first explicit kernel argument.
205  unsigned getExplicitKernelArgOffset(const Function &F) const {
206  return isAmdHsaOrMesa(F) ? 0 : 36;
207  }
208 
209  /// \returns Maximum number of work groups per compute unit supported by the
210  /// subtarget and limited by given \p FlatWorkGroupSize.
211  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
212 
213  /// \returns Minimum flat work group size supported by the subtarget.
214  virtual unsigned getMinFlatWorkGroupSize() const = 0;
215 
216  /// \returns Maximum flat work group size supported by the subtarget.
217  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
218 
219  /// \returns Maximum number of waves per execution unit supported by the
220  /// subtarget and limited by given \p FlatWorkGroupSize.
221  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
222 
223  /// \returns Minimum number of waves per execution unit supported by the
224  /// subtarget.
225  virtual unsigned getMinWavesPerEU() const = 0;
226 
227  /// \returns Maximum number of waves per execution unit supported by the
228  /// subtarget without any kind of limitation.
229  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
230 
231  /// Creates value range metadata on an workitemid.* inrinsic call or load.
232  bool makeLIDRangeMetadata(Instruction *I) const;
233 
234  /// \returns Number of bytes of arguments that are passed to a shader or
235  /// kernel in addition to the explicit ones declared for the function.
236  unsigned getImplicitArgNumBytes(const Function &F) const {
237  if (isMesaKernel(F))
238  return 16;
239  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
240  }
241  uint64_t getExplicitKernArgSize(const Function &F,
242  unsigned &MaxAlign) const;
243  unsigned getKernArgSegmentSize(const Function &F,
244  unsigned &MaxAlign) const;
245 
246  virtual ~AMDGPUSubtarget() {}
247 };
248 
250  public AMDGPUSubtarget {
251 
253 
254 public:
256  TrapHandlerAbiNone = 0,
257  TrapHandlerAbiHsa = 1
258  };
259 
260  enum TrapID {
261  TrapIDHardwareReserved = 0,
262  TrapIDHSADebugTrap = 1,
263  TrapIDLLVMTrap = 2,
264  TrapIDLLVMDebugTrap = 3,
265  TrapIDDebugBreakpoint = 7,
266  TrapIDDebugReserved8 = 8,
267  TrapIDDebugReservedFE = 0xfe,
268  TrapIDDebugReservedFF = 0xff
269  };
270 
272  LLVMTrapHandlerRegValue = 1
273  };
274 
275 private:
276  /// GlobalISel related APIs.
277  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
278  std::unique_ptr<InstructionSelector> InstSelector;
279  std::unique_ptr<LegalizerInfo> Legalizer;
280  std::unique_ptr<RegisterBankInfo> RegBankInfo;
281 
282 protected:
283  // Basic subtarget description.
285  unsigned Gen;
289 
290  // Possibly statically set by tablegen, but may want to be overridden.
293 
294  // Dynamially set bits that enable features.
306 
307  // Used as options.
313  bool DumpCode;
314 
315  // Subtarget statically properties set by tablegen
316  bool FP64;
317  bool FMA;
318  bool MIMG_R128;
319  bool IsGCN;
321  bool CIInsts;
322  bool GFX8Insts;
323  bool GFX9Insts;
330  bool HasMovrel;
339  bool HasDPP;
340  bool HasDPP8;
356  bool HasVscnt;
368  bool CaymanISA;
369  bool CFALUBug;
375 
385 
386  // Dummy feature to use for assembler in tablegen.
388 
390 private:
391  SIInstrInfo InstrInfo;
392  SITargetLowering TLInfo;
393  SIFrameLowering FrameLowering;
394 
395  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
396  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
397 
398 public:
399  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
400  const GCNTargetMachine &TM);
401  ~GCNSubtarget() override;
402 
403  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
404  StringRef GPU, StringRef FS);
405 
406  const SIInstrInfo *getInstrInfo() const override {
407  return &InstrInfo;
408  }
409 
410  const SIFrameLowering *getFrameLowering() const override {
411  return &FrameLowering;
412  }
413 
414  const SITargetLowering *getTargetLowering() const override {
415  return &TLInfo;
416  }
417 
418  const SIRegisterInfo *getRegisterInfo() const override {
419  return &InstrInfo.getRegisterInfo();
420  }
421 
422  const CallLowering *getCallLowering() const override {
423  return CallLoweringInfo.get();
424  }
425 
427  return InstSelector.get();
428  }
429 
430  const LegalizerInfo *getLegalizerInfo() const override {
431  return Legalizer.get();
432  }
433 
434  const RegisterBankInfo *getRegBankInfo() const override {
435  return RegBankInfo.get();
436  }
437 
438  // Nothing implemented, just prevent crashes on use.
439  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
440  return &TSInfo;
441  }
442 
443  const InstrItineraryData *getInstrItineraryData() const override {
444  return &InstrItins;
445  }
446 
447  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
448 
450  return (Generation)Gen;
451  }
452 
453  unsigned getWavefrontSizeLog2() const {
454  return Log2_32(WavefrontSize);
455  }
456 
457  /// Return the number of high bits known to be zero fror a frame index.
459  return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
460  }
461 
462  int getLDSBankCount() const {
463  return LDSBankCount;
464  }
465 
466  unsigned getMaxPrivateElementSize() const {
467  return MaxPrivateElementSize;
468  }
469 
470  unsigned getConstantBusLimit(unsigned Opcode) const;
471 
472  bool hasIntClamp() const {
473  return HasIntClamp;
474  }
475 
476  bool hasFP64() const {
477  return FP64;
478  }
479 
480  bool hasMIMG_R128() const {
481  return MIMG_R128;
482  }
483 
484  bool hasHWFP64() const {
485  return FP64;
486  }
487 
488  bool hasFastFMAF32() const {
489  return FastFMAF32;
490  }
491 
492  bool hasHalfRate64Ops() const {
493  return HalfRate64Ops;
494  }
495 
496  bool hasAddr64() const {
497  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
498  }
499 
500  // Return true if the target only has the reverse operand versions of VALU
501  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
502  bool hasOnlyRevVALUShifts() const {
503  return getGeneration() >= VOLCANIC_ISLANDS;
504  }
505 
506  bool hasBFE() const {
507  return true;
508  }
509 
510  bool hasBFI() const {
511  return true;
512  }
513 
514  bool hasBFM() const {
515  return hasBFE();
516  }
517 
518  bool hasBCNT(unsigned Size) const {
519  return true;
520  }
521 
522  bool hasFFBL() const {
523  return true;
524  }
525 
526  bool hasFFBH() const {
527  return true;
528  }
529 
530  bool hasMed3_16() const {
531  return getGeneration() >= AMDGPUSubtarget::GFX9;
532  }
533 
534  bool hasMin3Max3_16() const {
535  return getGeneration() >= AMDGPUSubtarget::GFX9;
536  }
537 
538  bool hasFmaMixInsts() const {
539  return HasFmaMixInsts;
540  }
541 
542  bool hasCARRY() const {
543  return true;
544  }
545 
546  bool hasFMA() const {
547  return FMA;
548  }
549 
550  bool hasSwap() const {
551  return GFX9Insts;
552  }
553 
554  bool hasScalarPackInsts() const {
555  return GFX9Insts;
556  }
557 
559  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
560  }
561 
562  /// True if the offset field of DS instructions works as expected. On SI, the
563  /// offset uses a 16-bit adder and does not always wrap properly.
564  bool hasUsableDSOffset() const {
565  return getGeneration() >= SEA_ISLANDS;
566  }
567 
569  return EnableUnsafeDSOffsetFolding;
570  }
571 
572  /// Condition output from div_scale is usable.
574  return getGeneration() != SOUTHERN_ISLANDS;
575  }
576 
577  /// Extra wait hazard is needed in some cases before
578  /// s_cbranch_vccnz/s_cbranch_vccz.
579  bool hasReadVCCZBug() const {
580  return getGeneration() <= SEA_ISLANDS;
581  }
582 
583  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
584  /// was written by a VALU instruction.
586  return getGeneration() == SOUTHERN_ISLANDS;
587  }
588 
589  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
590  /// SGPR was written by a VALU Instruction.
592  return getGeneration() >= VOLCANIC_ISLANDS;
593  }
594 
595  bool hasRFEHazards() const {
596  return getGeneration() >= VOLCANIC_ISLANDS;
597  }
598 
599  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
600  unsigned getSetRegWaitStates() const {
601  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
602  }
603 
604  bool dumpCode() const {
605  return DumpCode;
606  }
607 
608  /// Return the amount of LDS that can be used that will not restrict the
609  /// occupancy lower than WaveCount.
610  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
611  const Function &) const;
612 
613  bool hasFP16Denormals() const {
614  return FP64FP16Denormals;
615  }
616 
617  bool hasFP64Denormals() const {
618  return FP64FP16Denormals;
619  }
620 
622  return getGeneration() >= AMDGPUSubtarget::GFX9;
623  }
624 
625  /// \returns If target supports S_DENORM_MODE.
626  bool hasDenormModeInst() const {
627  return getGeneration() >= AMDGPUSubtarget::GFX10;
628  }
629 
630  bool useFlatForGlobal() const {
631  return FlatForGlobal;
632  }
633 
634  /// \returns If target supports ds_read/write_b128 and user enables generation
635  /// of ds_read/write_b128.
636  bool useDS128() const {
637  return CIInsts && EnableDS128;
638  }
639 
640  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
641  bool haveRoundOpsF64() const {
642  return CIInsts;
643  }
644 
645  /// \returns If MUBUF instructions always perform range checking, even for
646  /// buffer resources used for private memory access.
648  return getGeneration() < AMDGPUSubtarget::GFX9;
649  }
650 
651  /// \returns If target requires PRT Struct NULL support (zero result registers
652  /// for sparse texture support).
653  bool usePRTStrictNull() const {
654  return EnablePRTStrictNull;
655  }
656 
658  return AutoWaitcntBeforeBarrier;
659  }
660 
661  bool hasCodeObjectV3() const {
662  // FIXME: Need to add code object v3 support for mesa and pal.
663  return isAmdHsaOS() ? CodeObjectV3 : false;
664  }
665 
667  return UnalignedBufferAccess;
668  }
669 
671  return UnalignedScratchAccess;
672  }
673 
674  bool hasApertureRegs() const {
675  return HasApertureRegs;
676  }
677 
678  bool isTrapHandlerEnabled() const {
679  return TrapHandler;
680  }
681 
682  bool isXNACKEnabled() const {
683  return EnableXNACK;
684  }
685 
686  bool isCuModeEnabled() const {
687  return EnableCuMode;
688  }
689 
690  bool hasFlatAddressSpace() const {
691  return FlatAddressSpace;
692  }
693 
694  bool hasFlatScrRegister() const {
695  return hasFlatAddressSpace();
696  }
697 
698  bool hasFlatInstOffsets() const {
699  return FlatInstOffsets;
700  }
701 
702  bool hasFlatGlobalInsts() const {
703  return FlatGlobalInsts;
704  }
705 
706  bool hasFlatScratchInsts() const {
707  return FlatScratchInsts;
708  }
709 
711  return ScalarFlatScratchInsts;
712  }
713 
714  bool hasFlatSegmentOffsetBug() const {
715  return HasFlatSegmentOffsetBug;
716  }
717 
719  return getGeneration() > GFX9;
720  }
721 
722  bool hasD16LoadStore() const {
723  return getGeneration() >= GFX9;
724  }
725 
726  bool d16PreservesUnusedBits() const {
727  return hasD16LoadStore() && !isSRAMECCEnabled();
728  }
729 
730  bool hasD16Images() const {
731  return getGeneration() >= VOLCANIC_ISLANDS;
732  }
733 
734  /// Return if most LDS instructions have an m0 use that require m0 to be
735  /// iniitalized.
736  bool ldsRequiresM0Init() const {
737  return getGeneration() < GFX9;
738  }
739 
740  // True if the hardware rewinds and replays GWS operations if a wave is
741  // preempted.
742  //
743  // If this is false, a GWS operation requires testing if a nack set the
744  // MEM_VIOL bit, and repeating if so.
745  bool hasGWSAutoReplay() const {
746  return getGeneration() >= GFX9;
747  }
748 
749  /// \returns if target has ds_gws_sema_release_all instruction.
750  bool hasGWSSemaReleaseAll() const {
751  return CIInsts;
752  }
753 
754  bool hasAddNoCarry() const {
755  return AddNoCarryInsts;
756  }
757 
758  bool hasUnpackedD16VMem() const {
759  return HasUnpackedD16VMem;
760  }
761 
762  // Covers VS/PS/CS graphics shaders
763  bool isMesaGfxShader(const Function &F) const {
764  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
765  }
766 
767  bool hasMad64_32() const {
768  return getGeneration() >= SEA_ISLANDS;
769  }
770 
771  bool hasSDWAOmod() const {
772  return HasSDWAOmod;
773  }
774 
775  bool hasSDWAScalar() const {
776  return HasSDWAScalar;
777  }
778 
779  bool hasSDWASdst() const {
780  return HasSDWASdst;
781  }
782 
783  bool hasSDWAMac() const {
784  return HasSDWAMac;
785  }
786 
787  bool hasSDWAOutModsVOPC() const {
788  return HasSDWAOutModsVOPC;
789  }
790 
791  bool hasDLInsts() const {
792  return HasDLInsts;
793  }
794 
795  bool hasDot1Insts() const {
796  return HasDot1Insts;
797  }
798 
799  bool hasDot2Insts() const {
800  return HasDot2Insts;
801  }
802 
803  bool hasDot3Insts() const {
804  return HasDot3Insts;
805  }
806 
807  bool hasDot4Insts() const {
808  return HasDot4Insts;
809  }
810 
811  bool hasDot5Insts() const {
812  return HasDot5Insts;
813  }
814 
815  bool hasDot6Insts() const {
816  return HasDot6Insts;
817  }
818 
819  bool hasMAIInsts() const {
820  return HasMAIInsts;
821  }
822 
823  bool hasPkFmacF16Inst() const {
824  return HasPkFmacF16Inst;
825  }
826 
827  bool hasAtomicFaddInsts() const {
828  return HasAtomicFaddInsts;
829  }
830 
831  bool isSRAMECCEnabled() const {
832  return EnableSRAMECC;
833  }
834 
835  bool hasNoSdstCMPX() const {
836  return HasNoSdstCMPX;
837  }
838 
839  bool hasVscnt() const {
840  return HasVscnt;
841  }
842 
843  bool hasRegisterBanking() const {
844  return HasRegisterBanking;
845  }
846 
847  bool hasVOP3Literal() const {
848  return HasVOP3Literal;
849  }
850 
851  bool hasNoDataDepHazard() const {
852  return HasNoDataDepHazard;
853  }
854 
856  return getGeneration() < SEA_ISLANDS;
857  }
858 
859  // Scratch is allocated in 256 dword per wave blocks for the entire
860  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
861  // is 4-byte aligned.
862  //
863  // Only 4-byte alignment is really needed to access anything. Transformations
864  // on the pointer value itself may rely on the alignment / known low bits of
865  // the pointer. Set this to something above the minimum to avoid needing
866  // dynamic realignment in common cases.
867  unsigned getStackAlignment() const {
868  return 16;
869  }
870 
871  bool enableMachineScheduler() const override {
872  return true;
873  }
874 
875  bool enableSubRegLiveness() const override {
876  return true;
877  }
878 
879  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
881 
882  /// \returns Number of execution units per compute unit supported by the
883  /// subtarget.
884  unsigned getEUsPerCU() const {
885  return AMDGPU::IsaInfo::getEUsPerCU(this);
886  }
887 
888  /// \returns Maximum number of waves per compute unit supported by the
889  /// subtarget without any kind of limitation.
890  unsigned getMaxWavesPerCU() const {
892  }
893 
894  /// \returns Maximum number of waves per compute unit supported by the
895  /// subtarget and limited by given \p FlatWorkGroupSize.
896  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
897  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
898  }
899 
900  /// \returns Number of waves per work group supported by the subtarget and
901  /// limited by given \p FlatWorkGroupSize.
902  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
903  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
904  }
905 
906  // static wrappers
907  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
908 
909  // XXX - Why is this here if it isn't in the default pass set?
910  bool enableEarlyIfConversion() const override {
911  return true;
912  }
913 
914  void overrideSchedPolicy(MachineSchedPolicy &Policy,
915  unsigned NumRegionInstrs) const override;
916 
917  unsigned getMaxNumUserSGPRs() const {
918  return 16;
919  }
920 
921  bool hasSMemRealTime() const {
922  return HasSMemRealTime;
923  }
924 
925  bool hasMovrel() const {
926  return HasMovrel;
927  }
928 
929  bool hasVGPRIndexMode() const {
930  return HasVGPRIndexMode;
931  }
932 
933  bool useVGPRIndexMode(bool UserEnable) const {
934  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
935  }
936 
937  bool hasScalarCompareEq64() const {
938  return getGeneration() >= VOLCANIC_ISLANDS;
939  }
940 
941  bool hasScalarStores() const {
942  return HasScalarStores;
943  }
944 
945  bool hasScalarAtomics() const {
946  return HasScalarAtomics;
947  }
948 
949  bool hasLDSFPAtomics() const {
950  return GFX8Insts;
951  }
952 
953  bool hasDPP() const {
954  return HasDPP;
955  }
956 
957  bool hasDPPBroadcasts() const {
958  return HasDPP && getGeneration() < GFX10;
959  }
960 
961  bool hasDPPWavefrontShifts() const {
962  return HasDPP && getGeneration() < GFX10;
963  }
964 
965  bool hasDPP8() const {
966  return HasDPP8;
967  }
968 
969  bool hasR128A16() const {
970  return HasR128A16;
971  }
972 
973  bool hasOffset3fBug() const {
974  return HasOffset3fBug;
975  }
976 
977  bool hasNSAEncoding() const {
978  return HasNSAEncoding;
979  }
980 
981  bool hasMadF16() const;
982 
983  bool enableSIScheduler() const {
984  return EnableSIScheduler;
985  }
986 
987  bool loadStoreOptEnabled() const {
988  return EnableLoadStoreOpt;
989  }
990 
991  bool hasSGPRInitBug() const {
992  return SGPRInitBug;
993  }
994 
995  bool hasMFMAInlineLiteralBug() const {
996  return HasMFMAInlineLiteralBug;
997  }
998 
999  bool has12DWordStoreHazard() const {
1000  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
1001  }
1002 
1003  // \returns true if the subtarget supports DWORDX3 load/store instructions.
1004  bool hasDwordx3LoadStores() const {
1005  return CIInsts;
1006  }
1007 
1008  bool hasSMovFedHazard() const {
1009  return getGeneration() == AMDGPUSubtarget::GFX9;
1010  }
1011 
1013  return getGeneration() == AMDGPUSubtarget::GFX9;
1014  }
1015 
1016  bool hasReadM0SendMsgHazard() const {
1017  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1018  getGeneration() <= AMDGPUSubtarget::GFX9;
1019  }
1020 
1021  bool hasVcmpxPermlaneHazard() const {
1022  return HasVcmpxPermlaneHazard;
1023  }
1024 
1026  return HasVMEMtoScalarWriteHazard;
1027  }
1028 
1030  return HasSMEMtoVectorWriteHazard;
1031  }
1032 
1033  bool hasLDSMisalignedBug() const {
1034  return LDSMisalignedBug && !EnableCuMode;
1035  }
1036 
1037  bool hasInstFwdPrefetchBug() const {
1038  return HasInstFwdPrefetchBug;
1039  }
1040 
1041  bool hasVcmpxExecWARHazard() const {
1042  return HasVcmpxExecWARHazard;
1043  }
1044 
1046  return HasLdsBranchVmemWARHazard;
1047  }
1048 
1049  bool hasNSAtoVMEMBug() const {
1050  return HasNSAtoVMEMBug;
1051  }
1052 
1053  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1054  /// SGPRs
1055  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1056 
1057  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1058  /// VGPRs
1059  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1060 
1061  /// Return occupancy for the given function. Used LDS and a number of
1062  /// registers if provided.
1063  /// Note, occupancy can be affected by the scratch allocation as well, but
1064  /// we do not have enough information to compute it.
1065  unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
1066  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1067 
1068  /// \returns true if the flat_scratch register should be initialized with the
1069  /// pointer to the wave's scratch memory rather than a size and offset.
1070  bool flatScratchIsPointer() const {
1071  return getGeneration() >= AMDGPUSubtarget::GFX9;
1072  }
1073 
1074  /// \returns true if the machine has merged shaders in which s0-s7 are
1075  /// reserved by the hardware and user SGPRs start at s8
1076  bool hasMergedShaders() const {
1077  return getGeneration() >= GFX9;
1078  }
1079 
1080  /// \returns SGPR allocation granularity supported by the subtarget.
1081  unsigned getSGPRAllocGranule() const {
1083  }
1084 
1085  /// \returns SGPR encoding granularity supported by the subtarget.
1086  unsigned getSGPREncodingGranule() const {
1088  }
1089 
1090  /// \returns Total number of SGPRs supported by the subtarget.
1091  unsigned getTotalNumSGPRs() const {
1092  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1093  }
1094 
1095  /// \returns Addressable number of SGPRs supported by the subtarget.
1096  unsigned getAddressableNumSGPRs() const {
1098  }
1099 
1100  /// \returns Minimum number of SGPRs that meets the given number of waves per
1101  /// execution unit requirement supported by the subtarget.
1102  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1103  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1104  }
1105 
1106  /// \returns Maximum number of SGPRs that meets the given number of waves per
1107  /// execution unit requirement supported by the subtarget.
1108  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1109  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1110  }
1111 
1112  /// \returns Reserved number of SGPRs for given function \p MF.
1113  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1114 
1115  /// \returns Maximum number of SGPRs that meets number of waves per execution
1116  /// unit requirement for function \p MF, or number of SGPRs explicitly
1117  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1118  ///
1119  /// \returns Value that meets number of waves per execution unit requirement
1120  /// if explicitly requested value cannot be converted to integer, violates
1121  /// subtarget's specifications, or does not meet number of waves per execution
1122  /// unit requirement.
1123  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1124 
1125  /// \returns VGPR allocation granularity supported by the subtarget.
1126  unsigned getVGPRAllocGranule() const {
1128  }
1129 
1130  /// \returns VGPR encoding granularity supported by the subtarget.
1131  unsigned getVGPREncodingGranule() const {
1133  }
1134 
1135  /// \returns Total number of VGPRs supported by the subtarget.
1136  unsigned getTotalNumVGPRs() const {
1137  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1138  }
1139 
1140  /// \returns Addressable number of VGPRs supported by the subtarget.
1141  unsigned getAddressableNumVGPRs() const {
1143  }
1144 
1145  /// \returns Minimum number of VGPRs that meets given number of waves per
1146  /// execution unit requirement supported by the subtarget.
1147  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1148  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1149  }
1150 
1151  /// \returns Maximum number of VGPRs that meets given number of waves per
1152  /// execution unit requirement supported by the subtarget.
1153  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1154  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1155  }
1156 
1157  /// \returns Maximum number of VGPRs that meets number of waves per execution
1158  /// unit requirement for function \p MF, or number of VGPRs explicitly
1159  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1160  ///
1161  /// \returns Value that meets number of waves per execution unit requirement
1162  /// if explicitly requested value cannot be converted to integer, violates
1163  /// subtarget's specifications, or does not meet number of waves per execution
1164  /// unit requirement.
1165  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1166 
1167  void getPostRAMutations(
1168  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1169  const override;
1170 
1171  bool isWave32() const {
1172  return WavefrontSize == 32;
1173  }
1174 
1176  return getRegisterInfo()->getBoolRC();
1177  }
1178 
1179  /// \returns Maximum number of work groups per compute unit supported by the
1180  /// subtarget and limited by given \p FlatWorkGroupSize.
1181  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1182  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1183  }
1184 
1185  /// \returns Minimum flat work group size supported by the subtarget.
1186  unsigned getMinFlatWorkGroupSize() const override {
1188  }
1189 
1190  /// \returns Maximum flat work group size supported by the subtarget.
1191  unsigned getMaxFlatWorkGroupSize() const override {
1193  }
1194 
1195  /// \returns Maximum number of waves per execution unit supported by the
1196  /// subtarget and limited by given \p FlatWorkGroupSize.
1197  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1198  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1199  }
1200 
1201  /// \returns Minimum number of waves per execution unit supported by the
1202  /// subtarget.
1203  unsigned getMinWavesPerEU() const override {
1204  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1205  }
1206 };
1207 
1209  public AMDGPUSubtarget {
1210 private:
1211  R600InstrInfo InstrInfo;
1212  R600FrameLowering FrameLowering;
1213  bool FMA;
1214  bool CaymanISA;
1215  bool CFALUBug;
1216  bool HasVertexCache;
1217  bool R600ALUInst;
1218  bool FP64;
1219  short TexVTXClauseSize;
1220  Generation Gen;
1221  R600TargetLowering TLInfo;
1222  InstrItineraryData InstrItins;
1223  SelectionDAGTargetInfo TSInfo;
1224 
1225 public:
1226  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1227  const TargetMachine &TM);
1228 
1229  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1230 
1231  const R600FrameLowering *getFrameLowering() const override {
1232  return &FrameLowering;
1233  }
1234 
1235  const R600TargetLowering *getTargetLowering() const override {
1236  return &TLInfo;
1237  }
1238 
1239  const R600RegisterInfo *getRegisterInfo() const override {
1240  return &InstrInfo.getRegisterInfo();
1241  }
1242 
1243  const InstrItineraryData *getInstrItineraryData() const override {
1244  return &InstrItins;
1245  }
1246 
1247  // Nothing implemented, just prevent crashes on use.
1249  return &TSInfo;
1250  }
1251 
1252  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1253 
1255  return Gen;
1256  }
1257 
1258  unsigned getStackAlignment() const {
1259  return 4;
1260  }
1261 
1262  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1263  StringRef GPU, StringRef FS);
1264 
1265  bool hasBFE() const {
1266  return (getGeneration() >= EVERGREEN);
1267  }
1268 
1269  bool hasBFI() const {
1270  return (getGeneration() >= EVERGREEN);
1271  }
1272 
1273  bool hasBCNT(unsigned Size) const {
1274  if (Size == 32)
1275  return (getGeneration() >= EVERGREEN);
1276 
1277  return false;
1278  }
1279 
1280  bool hasBORROW() const {
1281  return (getGeneration() >= EVERGREEN);
1282  }
1283 
1284  bool hasCARRY() const {
1285  return (getGeneration() >= EVERGREEN);
1286  }
1287 
1288  bool hasCaymanISA() const {
1289  return CaymanISA;
1290  }
1291 
1292  bool hasFFBL() const {
1293  return (getGeneration() >= EVERGREEN);
1294  }
1295 
1296  bool hasFFBH() const {
1297  return (getGeneration() >= EVERGREEN);
1298  }
1299 
1300  bool hasFMA() const { return FMA; }
1301 
1302  bool hasCFAluBug() const { return CFALUBug; }
1303 
1304  bool hasVertexCache() const { return HasVertexCache; }
1305 
1306  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1307 
1308  bool enableMachineScheduler() const override {
1309  return true;
1310  }
1311 
1312  bool enableSubRegLiveness() const override {
1313  return true;
1314  }
1315 
1316  /// \returns Maximum number of work groups per compute unit supported by the
1317  /// subtarget and limited by given \p FlatWorkGroupSize.
1318  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1319  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1320  }
1321 
1322  /// \returns Minimum flat work group size supported by the subtarget.
1323  unsigned getMinFlatWorkGroupSize() const override {
1325  }
1326 
1327  /// \returns Maximum flat work group size supported by the subtarget.
1328  unsigned getMaxFlatWorkGroupSize() const override {
1330  }
1331 
1332  /// \returns Maximum number of waves per execution unit supported by the
1333  /// subtarget and limited by given \p FlatWorkGroupSize.
1334  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1335  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1336  }
1337 
1338  /// \returns Minimum number of waves per execution unit supported by the
1339  /// subtarget.
1340  unsigned getMinWavesPerEU() const override {
1341  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1342  }
1343 };
1344 
1345 } // end namespace llvm
1346 
1347 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasVscnt() const
bool hasSDWAOmod() const
bool hasLDSMisalignedBug() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasVOP3Literal() const
bool hasPkFmacF16Inst() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool hasScalarPackInsts() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasFlatScrRegister() const
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
bool hasNSAtoVMEMBug() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasAtomicFaddInsts() const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasVcmpxPermlaneHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasInstFwdPrefetchBug() const
bool hasFastFMAF32() const
Generation getGeneration() const
bool hasFlatSegmentOffsetBug() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasDPPWavefrontShifts() const
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasLdsBranchVmemWARHazard() const
bool hasD16Images() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
bool hasDot4Insts() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
const TargetRegisterClass * getBoolRC() const
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasVMEMtoScalarWriteHazard() const
bool hasAddNoCarry() const
bool hasDot3Insts() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasNoDataDepHazard() const
bool dumpCode() const
bool hasDot6Insts() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasNSAEncoding() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool isCuModeEnabled() const
bool hasLDSFPAtomics() const
bool hasSMEMtoVectorWriteHazard() const
const R600InstrInfo * getInstrInfo() const override
bool hasRegisterBanking() const
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasGWSAutoReplay() const
bool hasOnlyRevVALUShifts() const
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasOffset3fBug() const
bool hasVcmpxExecWARHazard() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
bool hasGWSSemaReleaseAll() const
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDenormModeInst() const
bool enableSIScheduler() const
bool hasRFEHazards() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasMFMAInlineLiteralBug() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
bool hasScalarFlatScratchInsts() const
unsigned getAlignmentForImplicitArgPtr() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
int getLDSBankCount() const
bool hasDPPBroadcasts() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool hasDot5Insts() const
bool enableSubRegLiveness() const override
bool hasNoSdstCMPX() const
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:333
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasMAIInsts() const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool hasDPP8() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
InstructionSelector * getInstructionSelector() const override
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override