LLVM  9.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7,
59  GFX10 = 8
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdHsaOrMesa(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool hasTrigReducedRange() const {
183  return HasTrigReducedRange;
184  }
185 
186  bool isPromoteAllocaEnabled() const {
187  return EnablePromoteAlloca;
188  }
189 
190  unsigned getWavefrontSize() const {
191  return WavefrontSize;
192  }
193 
194  int getLocalMemorySize() const {
195  return LocalMemorySize;
196  }
197 
198  unsigned getAlignmentForImplicitArgPtr() const {
199  return isAmdHsaOS() ? 8 : 4;
200  }
201 
202  /// Returns the offset in bytes from the start of the input buffer
203  /// of the first explicit kernel argument.
204  unsigned getExplicitKernelArgOffset(const Function &F) const {
205  return isAmdHsaOrMesa(F) ? 0 : 36;
206  }
207 
208  /// \returns Maximum number of work groups per compute unit supported by the
209  /// subtarget and limited by given \p FlatWorkGroupSize.
210  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
211 
212  /// \returns Minimum flat work group size supported by the subtarget.
213  virtual unsigned getMinFlatWorkGroupSize() const = 0;
214 
215  /// \returns Maximum flat work group size supported by the subtarget.
216  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
217 
218  /// \returns Maximum number of waves per execution unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum number of waves per execution unit supported by the
223  /// subtarget.
224  virtual unsigned getMinWavesPerEU() const = 0;
225 
226  unsigned getMaxWavesPerEU() const { return 10; }
227 
228  /// Creates value range metadata on an workitemid.* inrinsic call or load.
229  bool makeLIDRangeMetadata(Instruction *I) const;
230 
231  /// \returns Number of bytes of arguments that are passed to a shader or
232  /// kernel in addition to the explicit ones declared for the function.
233  unsigned getImplicitArgNumBytes(const Function &F) const {
234  if (isMesaKernel(F))
235  return 16;
236  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
237  }
238  uint64_t getExplicitKernArgSize(const Function &F,
239  unsigned &MaxAlign) const;
240  unsigned getKernArgSegmentSize(const Function &F,
241  unsigned &MaxAlign) const;
242 
243  virtual ~AMDGPUSubtarget() {}
244 };
245 
247  public AMDGPUSubtarget {
248 public:
250  TrapHandlerAbiNone = 0,
251  TrapHandlerAbiHsa = 1
252  };
253 
254  enum TrapID {
255  TrapIDHardwareReserved = 0,
256  TrapIDHSADebugTrap = 1,
257  TrapIDLLVMTrap = 2,
258  TrapIDLLVMDebugTrap = 3,
259  TrapIDDebugBreakpoint = 7,
260  TrapIDDebugReserved8 = 8,
261  TrapIDDebugReservedFE = 0xfe,
262  TrapIDDebugReservedFF = 0xff
263  };
264 
266  LLVMTrapHandlerRegValue = 1
267  };
268 
269 private:
270  /// GlobalISel related APIs.
271  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
272  std::unique_ptr<InstructionSelector> InstSelector;
273  std::unique_ptr<LegalizerInfo> Legalizer;
274  std::unique_ptr<RegisterBankInfo> RegBankInfo;
275 
276 protected:
277  // Basic subtarget description.
279  unsigned Gen;
283 
284  // Possibly statically set by tablegen, but may want to be overridden.
287 
288  // Dynamially set bits that enable features.
300 
301  // Used as options.
307  bool DumpCode;
308 
309  // Subtarget statically properties set by tablegen
310  bool FP64;
311  bool FMA;
312  bool MIMG_R128;
313  bool IsGCN;
315  bool CIInsts;
316  bool GFX8Insts;
317  bool GFX9Insts;
324  bool HasMovrel;
333  bool HasDPP;
334  bool HasDPP8;
350  bool HasVscnt;
362  bool CaymanISA;
363  bool CFALUBug;
368 
378 
379  // Dummy feature to use for assembler in tablegen.
381 
383 private:
384  SIInstrInfo InstrInfo;
385  SITargetLowering TLInfo;
386  SIFrameLowering FrameLowering;
387 
388  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
389  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
390 
391 public:
392  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
393  const GCNTargetMachine &TM);
394  ~GCNSubtarget() override;
395 
396  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
397  StringRef GPU, StringRef FS);
398 
399  const SIInstrInfo *getInstrInfo() const override {
400  return &InstrInfo;
401  }
402 
403  const SIFrameLowering *getFrameLowering() const override {
404  return &FrameLowering;
405  }
406 
407  const SITargetLowering *getTargetLowering() const override {
408  return &TLInfo;
409  }
410 
411  const SIRegisterInfo *getRegisterInfo() const override {
412  return &InstrInfo.getRegisterInfo();
413  }
414 
415  const CallLowering *getCallLowering() const override {
416  return CallLoweringInfo.get();
417  }
418 
419  const InstructionSelector *getInstructionSelector() const override {
420  return InstSelector.get();
421  }
422 
423  const LegalizerInfo *getLegalizerInfo() const override {
424  return Legalizer.get();
425  }
426 
427  const RegisterBankInfo *getRegBankInfo() const override {
428  return RegBankInfo.get();
429  }
430 
431  // Nothing implemented, just prevent crashes on use.
432  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
433  return &TSInfo;
434  }
435 
436  const InstrItineraryData *getInstrItineraryData() const override {
437  return &InstrItins;
438  }
439 
440  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
441 
443  return (Generation)Gen;
444  }
445 
446  unsigned getWavefrontSizeLog2() const {
447  return Log2_32(WavefrontSize);
448  }
449 
450  /// Return the number of high bits known to be zero fror a frame index.
452  return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
453  }
454 
455  int getLDSBankCount() const {
456  return LDSBankCount;
457  }
458 
459  unsigned getMaxPrivateElementSize() const {
460  return MaxPrivateElementSize;
461  }
462 
463  unsigned getConstantBusLimit(unsigned Opcode) const;
464 
465  bool hasIntClamp() const {
466  return HasIntClamp;
467  }
468 
469  bool hasFP64() const {
470  return FP64;
471  }
472 
473  bool hasMIMG_R128() const {
474  return MIMG_R128;
475  }
476 
477  bool hasHWFP64() const {
478  return FP64;
479  }
480 
481  bool hasFastFMAF32() const {
482  return FastFMAF32;
483  }
484 
485  bool hasHalfRate64Ops() const {
486  return HalfRate64Ops;
487  }
488 
489  bool hasAddr64() const {
490  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
491  }
492 
493  // Return true if the target only has the reverse operand versions of VALU
494  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
495  bool hasOnlyRevVALUShifts() const {
496  return getGeneration() >= VOLCANIC_ISLANDS;
497  }
498 
499  bool hasBFE() const {
500  return true;
501  }
502 
503  bool hasBFI() const {
504  return true;
505  }
506 
507  bool hasBFM() const {
508  return hasBFE();
509  }
510 
511  bool hasBCNT(unsigned Size) const {
512  return true;
513  }
514 
515  bool hasFFBL() const {
516  return true;
517  }
518 
519  bool hasFFBH() const {
520  return true;
521  }
522 
523  bool hasMed3_16() const {
524  return getGeneration() >= AMDGPUSubtarget::GFX9;
525  }
526 
527  bool hasMin3Max3_16() const {
528  return getGeneration() >= AMDGPUSubtarget::GFX9;
529  }
530 
531  bool hasFmaMixInsts() const {
532  return HasFmaMixInsts;
533  }
534 
535  bool hasCARRY() const {
536  return true;
537  }
538 
539  bool hasFMA() const {
540  return FMA;
541  }
542 
543  bool hasSwap() const {
544  return GFX9Insts;
545  }
546 
548  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
549  }
550 
551  /// True if the offset field of DS instructions works as expected. On SI, the
552  /// offset uses a 16-bit adder and does not always wrap properly.
553  bool hasUsableDSOffset() const {
554  return getGeneration() >= SEA_ISLANDS;
555  }
556 
558  return EnableUnsafeDSOffsetFolding;
559  }
560 
561  /// Condition output from div_scale is usable.
563  return getGeneration() != SOUTHERN_ISLANDS;
564  }
565 
566  /// Extra wait hazard is needed in some cases before
567  /// s_cbranch_vccnz/s_cbranch_vccz.
568  bool hasReadVCCZBug() const {
569  return getGeneration() <= SEA_ISLANDS;
570  }
571 
572  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
573  /// was written by a VALU instruction.
575  return getGeneration() == SOUTHERN_ISLANDS;
576  }
577 
578  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
579  /// SGPR was written by a VALU Instruction.
581  return getGeneration() >= VOLCANIC_ISLANDS;
582  }
583 
584  bool hasRFEHazards() const {
585  return getGeneration() >= VOLCANIC_ISLANDS;
586  }
587 
588  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
589  unsigned getSetRegWaitStates() const {
590  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
591  }
592 
593  bool dumpCode() const {
594  return DumpCode;
595  }
596 
597  /// Return the amount of LDS that can be used that will not restrict the
598  /// occupancy lower than WaveCount.
599  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
600  const Function &) const;
601 
602  bool hasFP16Denormals() const {
603  return FP64FP16Denormals;
604  }
605 
606  bool hasFP64Denormals() const {
607  return FP64FP16Denormals;
608  }
609 
611  return getGeneration() >= AMDGPUSubtarget::GFX9;
612  }
613 
614  bool useFlatForGlobal() const {
615  return FlatForGlobal;
616  }
617 
618  /// \returns If target supports ds_read/write_b128 and user enables generation
619  /// of ds_read/write_b128.
620  bool useDS128() const {
621  return CIInsts && EnableDS128;
622  }
623 
624  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
625  bool haveRoundOpsF64() const {
626  return CIInsts;
627  }
628 
629  /// \returns If MUBUF instructions always perform range checking, even for
630  /// buffer resources used for private memory access.
632  return getGeneration() < AMDGPUSubtarget::GFX9;
633  }
634 
635  /// \returns If target requires PRT Struct NULL support (zero result registers
636  /// for sparse texture support).
637  bool usePRTStrictNull() const {
638  return EnablePRTStrictNull;
639  }
640 
642  return AutoWaitcntBeforeBarrier;
643  }
644 
645  bool hasCodeObjectV3() const {
646  // FIXME: Need to add code object v3 support for mesa and pal.
647  return isAmdHsaOS() ? CodeObjectV3 : false;
648  }
649 
651  return UnalignedBufferAccess;
652  }
653 
655  return UnalignedScratchAccess;
656  }
657 
658  bool hasApertureRegs() const {
659  return HasApertureRegs;
660  }
661 
662  bool isTrapHandlerEnabled() const {
663  return TrapHandler;
664  }
665 
666  bool isXNACKEnabled() const {
667  return EnableXNACK;
668  }
669 
670  bool isCuModeEnabled() const {
671  return EnableCuMode;
672  }
673 
674  bool hasFlatAddressSpace() const {
675  return FlatAddressSpace;
676  }
677 
678  bool hasFlatScrRegister() const {
679  return hasFlatAddressSpace();
680  }
681 
682  bool hasFlatInstOffsets() const {
683  return FlatInstOffsets;
684  }
685 
686  bool hasFlatGlobalInsts() const {
687  return FlatGlobalInsts;
688  }
689 
690  bool hasFlatScratchInsts() const {
691  return FlatScratchInsts;
692  }
693 
695  return ScalarFlatScratchInsts;
696  }
697 
698  bool hasFlatSegmentOffsetBug() const {
699  return HasFlatSegmentOffsetBug;
700  }
701 
703  return getGeneration() > GFX9;
704  }
705 
706  bool hasD16LoadStore() const {
707  return getGeneration() >= GFX9;
708  }
709 
710  bool d16PreservesUnusedBits() const {
711  return hasD16LoadStore() && !isSRAMECCEnabled();
712  }
713 
714  bool hasD16Images() const {
715  return getGeneration() >= VOLCANIC_ISLANDS;
716  }
717 
718  /// Return if most LDS instructions have an m0 use that require m0 to be
719  /// iniitalized.
720  bool ldsRequiresM0Init() const {
721  return getGeneration() < GFX9;
722  }
723 
724  // True if the hardware rewinds and replays GWS operations if a wave is
725  // preempted.
726  //
727  // If this is false, a GWS operation requires testing if a nack set the
728  // MEM_VIOL bit, and repeating if so.
729  bool hasGWSAutoReplay() const {
730  return getGeneration() >= GFX9;
731  }
732 
733  /// \returns if target has ds_gws_sema_release_all instruction.
734  bool hasGWSSemaReleaseAll() const {
735  return CIInsts;
736  }
737 
738  bool hasAddNoCarry() const {
739  return AddNoCarryInsts;
740  }
741 
742  bool hasUnpackedD16VMem() const {
743  return HasUnpackedD16VMem;
744  }
745 
746  // Covers VS/PS/CS graphics shaders
747  bool isMesaGfxShader(const Function &F) const {
748  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
749  }
750 
751  bool hasMad64_32() const {
752  return getGeneration() >= SEA_ISLANDS;
753  }
754 
755  bool hasSDWAOmod() const {
756  return HasSDWAOmod;
757  }
758 
759  bool hasSDWAScalar() const {
760  return HasSDWAScalar;
761  }
762 
763  bool hasSDWASdst() const {
764  return HasSDWASdst;
765  }
766 
767  bool hasSDWAMac() const {
768  return HasSDWAMac;
769  }
770 
771  bool hasSDWAOutModsVOPC() const {
772  return HasSDWAOutModsVOPC;
773  }
774 
775  bool hasDLInsts() const {
776  return HasDLInsts;
777  }
778 
779  bool hasDot1Insts() const {
780  return HasDot1Insts;
781  }
782 
783  bool hasDot2Insts() const {
784  return HasDot2Insts;
785  }
786 
787  bool hasDot3Insts() const {
788  return HasDot3Insts;
789  }
790 
791  bool hasDot4Insts() const {
792  return HasDot4Insts;
793  }
794 
795  bool hasDot5Insts() const {
796  return HasDot5Insts;
797  }
798 
799  bool hasDot6Insts() const {
800  return HasDot6Insts;
801  }
802 
803  bool hasMAIInsts() const {
804  return HasMAIInsts;
805  }
806 
807  bool hasPkFmacF16Inst() const {
808  return HasPkFmacF16Inst;
809  }
810 
811  bool hasAtomicFaddInsts() const {
812  return HasAtomicFaddInsts;
813  }
814 
815  bool isSRAMECCEnabled() const {
816  return EnableSRAMECC;
817  }
818 
819  bool hasNoSdstCMPX() const {
820  return HasNoSdstCMPX;
821  }
822 
823  bool hasVscnt() const {
824  return HasVscnt;
825  }
826 
827  bool hasRegisterBanking() const {
828  return HasRegisterBanking;
829  }
830 
831  bool hasVOP3Literal() const {
832  return HasVOP3Literal;
833  }
834 
835  bool hasNoDataDepHazard() const {
836  return HasNoDataDepHazard;
837  }
838 
840  return getGeneration() < SEA_ISLANDS;
841  }
842 
843  // Scratch is allocated in 256 dword per wave blocks for the entire
844  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
845  // is 4-byte aligned.
846  //
847  // Only 4-byte alignment is really needed to access anything. Transformations
848  // on the pointer value itself may rely on the alignment / known low bits of
849  // the pointer. Set this to something above the minimum to avoid needing
850  // dynamic realignment in common cases.
851  unsigned getStackAlignment() const {
852  return 16;
853  }
854 
855  bool enableMachineScheduler() const override {
856  return true;
857  }
858 
859  bool enableSubRegLiveness() const override {
860  return true;
861  }
862 
863  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
865 
866  /// \returns Number of execution units per compute unit supported by the
867  /// subtarget.
868  unsigned getEUsPerCU() const {
869  return AMDGPU::IsaInfo::getEUsPerCU(this);
870  }
871 
872  /// \returns Maximum number of waves per compute unit supported by the
873  /// subtarget without any kind of limitation.
874  unsigned getMaxWavesPerCU() const {
876  }
877 
878  /// \returns Maximum number of waves per compute unit supported by the
879  /// subtarget and limited by given \p FlatWorkGroupSize.
880  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
881  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
882  }
883 
884  /// \returns Maximum number of waves per execution unit supported by the
885  /// subtarget without any kind of limitation.
886  unsigned getMaxWavesPerEU() const {
888  }
889 
890  /// \returns Number of waves per work group supported by the subtarget and
891  /// limited by given \p FlatWorkGroupSize.
892  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
893  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
894  }
895 
896  // static wrappers
897  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
898 
899  // XXX - Why is this here if it isn't in the default pass set?
900  bool enableEarlyIfConversion() const override {
901  return true;
902  }
903 
904  void overrideSchedPolicy(MachineSchedPolicy &Policy,
905  unsigned NumRegionInstrs) const override;
906 
907  unsigned getMaxNumUserSGPRs() const {
908  return 16;
909  }
910 
911  bool hasSMemRealTime() const {
912  return HasSMemRealTime;
913  }
914 
915  bool hasMovrel() const {
916  return HasMovrel;
917  }
918 
919  bool hasVGPRIndexMode() const {
920  return HasVGPRIndexMode;
921  }
922 
923  bool useVGPRIndexMode(bool UserEnable) const {
924  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
925  }
926 
927  bool hasScalarCompareEq64() const {
928  return getGeneration() >= VOLCANIC_ISLANDS;
929  }
930 
931  bool hasScalarStores() const {
932  return HasScalarStores;
933  }
934 
935  bool hasScalarAtomics() const {
936  return HasScalarAtomics;
937  }
938 
939  bool hasLDSFPAtomics() const {
940  return GFX8Insts;
941  }
942 
943  bool hasDPP() const {
944  return HasDPP;
945  }
946 
947  bool hasDPP8() const {
948  return HasDPP8;
949  }
950 
951  bool hasR128A16() const {
952  return HasR128A16;
953  }
954 
955  bool hasOffset3fBug() const {
956  return HasOffset3fBug;
957  }
958 
959  bool hasNSAEncoding() const {
960  return HasNSAEncoding;
961  }
962 
963  bool hasMadF16() const;
964 
965  bool enableSIScheduler() const {
966  return EnableSIScheduler;
967  }
968 
969  bool loadStoreOptEnabled() const {
970  return EnableLoadStoreOpt;
971  }
972 
973  bool hasSGPRInitBug() const {
974  return SGPRInitBug;
975  }
976 
977  bool has12DWordStoreHazard() const {
978  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
979  }
980 
981  // \returns true if the subtarget supports DWORDX3 load/store instructions.
982  bool hasDwordx3LoadStores() const {
983  return CIInsts;
984  }
985 
986  bool hasSMovFedHazard() const {
987  return getGeneration() == AMDGPUSubtarget::GFX9;
988  }
989 
991  return getGeneration() == AMDGPUSubtarget::GFX9;
992  }
993 
994  bool hasReadM0SendMsgHazard() const {
995  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
996  getGeneration() <= AMDGPUSubtarget::GFX9;
997  }
998 
999  bool hasVcmpxPermlaneHazard() const {
1000  return HasVcmpxPermlaneHazard;
1001  }
1002 
1004  return HasVMEMtoScalarWriteHazard;
1005  }
1006 
1008  return HasSMEMtoVectorWriteHazard;
1009  }
1010 
1011  bool hasLDSMisalignedBug() const {
1012  return LDSMisalignedBug && !EnableCuMode;
1013  }
1014 
1015  bool hasInstFwdPrefetchBug() const {
1016  return HasInstFwdPrefetchBug;
1017  }
1018 
1019  bool hasVcmpxExecWARHazard() const {
1020  return HasVcmpxExecWARHazard;
1021  }
1022 
1024  return HasLdsBranchVmemWARHazard;
1025  }
1026 
1027  bool hasNSAtoVMEMBug() const {
1028  return HasNSAtoVMEMBug;
1029  }
1030 
1031  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1032  /// SGPRs
1033  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1034 
1035  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1036  /// VGPRs
1037  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1038 
1039  /// \returns true if the flat_scratch register should be initialized with the
1040  /// pointer to the wave's scratch memory rather than a size and offset.
1041  bool flatScratchIsPointer() const {
1042  return getGeneration() >= AMDGPUSubtarget::GFX9;
1043  }
1044 
1045  /// \returns true if the machine has merged shaders in which s0-s7 are
1046  /// reserved by the hardware and user SGPRs start at s8
1047  bool hasMergedShaders() const {
1048  return getGeneration() >= GFX9;
1049  }
1050 
1051  /// \returns SGPR allocation granularity supported by the subtarget.
1052  unsigned getSGPRAllocGranule() const {
1054  }
1055 
1056  /// \returns SGPR encoding granularity supported by the subtarget.
1057  unsigned getSGPREncodingGranule() const {
1059  }
1060 
1061  /// \returns Total number of SGPRs supported by the subtarget.
1062  unsigned getTotalNumSGPRs() const {
1063  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1064  }
1065 
1066  /// \returns Addressable number of SGPRs supported by the subtarget.
1067  unsigned getAddressableNumSGPRs() const {
1069  }
1070 
1071  /// \returns Minimum number of SGPRs that meets the given number of waves per
1072  /// execution unit requirement supported by the subtarget.
1073  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1074  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1075  }
1076 
1077  /// \returns Maximum number of SGPRs that meets the given number of waves per
1078  /// execution unit requirement supported by the subtarget.
1079  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1080  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1081  }
1082 
1083  /// \returns Reserved number of SGPRs for given function \p MF.
1084  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1085 
1086  /// \returns Maximum number of SGPRs that meets number of waves per execution
1087  /// unit requirement for function \p MF, or number of SGPRs explicitly
1088  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1089  ///
1090  /// \returns Value that meets number of waves per execution unit requirement
1091  /// if explicitly requested value cannot be converted to integer, violates
1092  /// subtarget's specifications, or does not meet number of waves per execution
1093  /// unit requirement.
1094  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1095 
1096  /// \returns VGPR allocation granularity supported by the subtarget.
1097  unsigned getVGPRAllocGranule() const {
1099  }
1100 
1101  /// \returns VGPR encoding granularity supported by the subtarget.
1102  unsigned getVGPREncodingGranule() const {
1104  }
1105 
1106  /// \returns Total number of VGPRs supported by the subtarget.
1107  unsigned getTotalNumVGPRs() const {
1108  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1109  }
1110 
1111  /// \returns Addressable number of VGPRs supported by the subtarget.
1112  unsigned getAddressableNumVGPRs() const {
1114  }
1115 
1116  /// \returns Minimum number of VGPRs that meets given number of waves per
1117  /// execution unit requirement supported by the subtarget.
1118  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1119  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1120  }
1121 
1122  /// \returns Maximum number of VGPRs that meets given number of waves per
1123  /// execution unit requirement supported by the subtarget.
1124  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1125  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1126  }
1127 
1128  /// \returns Maximum number of VGPRs that meets number of waves per execution
1129  /// unit requirement for function \p MF, or number of VGPRs explicitly
1130  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1131  ///
1132  /// \returns Value that meets number of waves per execution unit requirement
1133  /// if explicitly requested value cannot be converted to integer, violates
1134  /// subtarget's specifications, or does not meet number of waves per execution
1135  /// unit requirement.
1136  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1137 
1138  void getPostRAMutations(
1139  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1140  const override;
1141 
1142  bool isWave32() const {
1143  return WavefrontSize == 32;
1144  }
1145 
1147  return getRegisterInfo()->getBoolRC();
1148  }
1149 
1150  /// \returns Maximum number of work groups per compute unit supported by the
1151  /// subtarget and limited by given \p FlatWorkGroupSize.
1152  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1153  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1154  }
1155 
1156  /// \returns Minimum flat work group size supported by the subtarget.
1157  unsigned getMinFlatWorkGroupSize() const override {
1159  }
1160 
1161  /// \returns Maximum flat work group size supported by the subtarget.
1162  unsigned getMaxFlatWorkGroupSize() const override {
1164  }
1165 
1166  /// \returns Maximum number of waves per execution unit supported by the
1167  /// subtarget and limited by given \p FlatWorkGroupSize.
1168  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1169  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1170  }
1171 
1172  /// \returns Minimum number of waves per execution unit supported by the
1173  /// subtarget.
1174  unsigned getMinWavesPerEU() const override {
1175  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1176  }
1177 };
1178 
1180  public AMDGPUSubtarget {
1181 private:
1182  R600InstrInfo InstrInfo;
1183  R600FrameLowering FrameLowering;
1184  bool FMA;
1185  bool CaymanISA;
1186  bool CFALUBug;
1187  bool HasVertexCache;
1188  bool R600ALUInst;
1189  bool FP64;
1190  short TexVTXClauseSize;
1191  Generation Gen;
1192  R600TargetLowering TLInfo;
1193  InstrItineraryData InstrItins;
1194  SelectionDAGTargetInfo TSInfo;
1195 
1196 public:
1197  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1198  const TargetMachine &TM);
1199 
1200  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1201 
1202  const R600FrameLowering *getFrameLowering() const override {
1203  return &FrameLowering;
1204  }
1205 
1206  const R600TargetLowering *getTargetLowering() const override {
1207  return &TLInfo;
1208  }
1209 
1210  const R600RegisterInfo *getRegisterInfo() const override {
1211  return &InstrInfo.getRegisterInfo();
1212  }
1213 
1214  const InstrItineraryData *getInstrItineraryData() const override {
1215  return &InstrItins;
1216  }
1217 
1218  // Nothing implemented, just prevent crashes on use.
1220  return &TSInfo;
1221  }
1222 
1223  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1224 
1226  return Gen;
1227  }
1228 
1229  unsigned getStackAlignment() const {
1230  return 4;
1231  }
1232 
1233  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1234  StringRef GPU, StringRef FS);
1235 
1236  bool hasBFE() const {
1237  return (getGeneration() >= EVERGREEN);
1238  }
1239 
1240  bool hasBFI() const {
1241  return (getGeneration() >= EVERGREEN);
1242  }
1243 
1244  bool hasBCNT(unsigned Size) const {
1245  if (Size == 32)
1246  return (getGeneration() >= EVERGREEN);
1247 
1248  return false;
1249  }
1250 
1251  bool hasBORROW() const {
1252  return (getGeneration() >= EVERGREEN);
1253  }
1254 
1255  bool hasCARRY() const {
1256  return (getGeneration() >= EVERGREEN);
1257  }
1258 
1259  bool hasCaymanISA() const {
1260  return CaymanISA;
1261  }
1262 
1263  bool hasFFBL() const {
1264  return (getGeneration() >= EVERGREEN);
1265  }
1266 
1267  bool hasFFBH() const {
1268  return (getGeneration() >= EVERGREEN);
1269  }
1270 
1271  bool hasFMA() const { return FMA; }
1272 
1273  bool hasCFAluBug() const { return CFALUBug; }
1274 
1275  bool hasVertexCache() const { return HasVertexCache; }
1276 
1277  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1278 
1279  bool enableMachineScheduler() const override {
1280  return true;
1281  }
1282 
1283  bool enableSubRegLiveness() const override {
1284  return true;
1285  }
1286 
1287  /// \returns Maximum number of work groups per compute unit supported by the
1288  /// subtarget and limited by given \p FlatWorkGroupSize.
1289  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1290  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1291  }
1292 
1293  /// \returns Minimum flat work group size supported by the subtarget.
1294  unsigned getMinFlatWorkGroupSize() const override {
1296  }
1297 
1298  /// \returns Maximum flat work group size supported by the subtarget.
1299  unsigned getMaxFlatWorkGroupSize() const override {
1301  }
1302 
1303  /// \returns Maximum number of waves per execution unit supported by the
1304  /// subtarget and limited by given \p FlatWorkGroupSize.
1305  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1306  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1307  }
1308 
1309  /// \returns Minimum number of waves per execution unit supported by the
1310  /// subtarget.
1311  unsigned getMinWavesPerEU() const override {
1312  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1313  }
1314 };
1315 
1316 } // end namespace llvm
1317 
1318 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasVscnt() const
bool hasSDWAOmod() const
bool hasLDSMisalignedBug() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasVOP3Literal() const
bool hasPkFmacF16Inst() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasFlatScrRegister() const
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
bool hasNSAtoVMEMBug() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasAtomicFaddInsts() const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasVcmpxPermlaneHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasInstFwdPrefetchBug() const
bool hasFastFMAF32() const
Generation getGeneration() const
bool hasFlatSegmentOffsetBug() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasLdsBranchVmemWARHazard() const
bool hasD16Images() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
bool hasDot4Insts() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
const TargetRegisterClass * getBoolRC() const
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasVMEMtoScalarWriteHazard() const
bool hasAddNoCarry() const
bool hasDot3Insts() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasNoDataDepHazard() const
bool dumpCode() const
bool hasDot6Insts() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasNSAEncoding() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool isCuModeEnabled() const
bool hasLDSFPAtomics() const
bool hasSMEMtoVectorWriteHazard() const
const R600InstrInfo * getInstrInfo() const override
bool hasRegisterBanking() const
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasGWSAutoReplay() const
bool hasOnlyRevVALUShifts() const
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasOffset3fBug() const
bool hasVcmpxExecWARHazard() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
bool hasGWSSemaReleaseAll() const
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool enableSIScheduler() const
bool hasRFEHazards() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
bool hasScalarFlatScratchInsts() const
unsigned getAlignmentForImplicitArgPtr() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool hasDot5Insts() const
bool enableSubRegLiveness() const override
bool hasNoSdstCMPX() const
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasMAIInsts() const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool hasDPP8() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override