LLVM  9.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "R600FrameLowering.h"
20 #include "R600ISelLowering.h"
21 #include "R600InstrInfo.h"
22 #include "SIFrameLowering.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/ADT/Triple.h"
34 #include <cassert>
35 #include <cstdint>
36 #include <memory>
37 #include <utility>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #define GET_SUBTARGETINFO_HEADER
42 #include "R600GenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
52  R700 = 1,
53  EVERGREEN = 2,
58  GFX9 = 7,
59  GFX10 = 8
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdHsaOrMesa(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool hasTrigReducedRange() const {
183  return HasTrigReducedRange;
184  }
185 
186  bool isPromoteAllocaEnabled() const {
187  return EnablePromoteAlloca;
188  }
189 
190  unsigned getWavefrontSize() const {
191  return WavefrontSize;
192  }
193 
194  int getLocalMemorySize() const {
195  return LocalMemorySize;
196  }
197 
198  unsigned getAlignmentForImplicitArgPtr() const {
199  return isAmdHsaOS() ? 8 : 4;
200  }
201 
202  /// Returns the offset in bytes from the start of the input buffer
203  /// of the first explicit kernel argument.
204  unsigned getExplicitKernelArgOffset(const Function &F) const {
205  return isAmdHsaOrMesa(F) ? 0 : 36;
206  }
207 
208  /// \returns Maximum number of work groups per compute unit supported by the
209  /// subtarget and limited by given \p FlatWorkGroupSize.
210  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
211 
212  /// \returns Minimum flat work group size supported by the subtarget.
213  virtual unsigned getMinFlatWorkGroupSize() const = 0;
214 
215  /// \returns Maximum flat work group size supported by the subtarget.
216  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
217 
218  /// \returns Maximum number of waves per execution unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum number of waves per execution unit supported by the
223  /// subtarget.
224  virtual unsigned getMinWavesPerEU() const = 0;
225 
226  unsigned getMaxWavesPerEU() const { return 10; }
227 
228  /// Creates value range metadata on an workitemid.* inrinsic call or load.
229  bool makeLIDRangeMetadata(Instruction *I) const;
230 
231  /// \returns Number of bytes of arguments that are passed to a shader or
232  /// kernel in addition to the explicit ones declared for the function.
233  unsigned getImplicitArgNumBytes(const Function &F) const {
234  if (isMesaKernel(F))
235  return 16;
236  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
237  }
238  uint64_t getExplicitKernArgSize(const Function &F,
239  unsigned &MaxAlign) const;
240  unsigned getKernArgSegmentSize(const Function &F,
241  unsigned &MaxAlign) const;
242 
243  virtual ~AMDGPUSubtarget() {}
244 };
245 
247  public AMDGPUSubtarget {
248 public:
250  TrapHandlerAbiNone = 0,
251  TrapHandlerAbiHsa = 1
252  };
253 
254  enum TrapID {
255  TrapIDHardwareReserved = 0,
256  TrapIDHSADebugTrap = 1,
257  TrapIDLLVMTrap = 2,
258  TrapIDLLVMDebugTrap = 3,
259  TrapIDDebugBreakpoint = 7,
260  TrapIDDebugReserved8 = 8,
261  TrapIDDebugReservedFE = 0xfe,
262  TrapIDDebugReservedFF = 0xff
263  };
264 
266  LLVMTrapHandlerRegValue = 1
267  };
268 
269 private:
270  /// GlobalISel related APIs.
271  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
272  std::unique_ptr<InstructionSelector> InstSelector;
273  std::unique_ptr<LegalizerInfo> Legalizer;
274  std::unique_ptr<RegisterBankInfo> RegBankInfo;
275 
276 protected:
277  // Basic subtarget description.
279  unsigned Gen;
283 
284  // Possibly statically set by tablegen, but may want to be overridden.
287 
288  // Dynamially set bits that enable features.
300 
301  // Used as options.
307  bool DumpCode;
308 
309  // Subtarget statically properties set by tablegen
310  bool FP64;
311  bool FMA;
312  bool MIMG_R128;
313  bool IsGCN;
315  bool CIInsts;
316  bool GFX8Insts;
317  bool GFX9Insts;
324  bool HasMovrel;
333  bool HasDPP;
334  bool HasDPP8;
345  bool HasVscnt;
357  bool CaymanISA;
358  bool CFALUBug;
363 
372 
373  // Dummy feature to use for assembler in tablegen.
375 
377 private:
378  SIInstrInfo InstrInfo;
379  SITargetLowering TLInfo;
380  SIFrameLowering FrameLowering;
381 
382  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
383  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
384 
385 public:
386  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
387  const GCNTargetMachine &TM);
388  ~GCNSubtarget() override;
389 
390  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
391  StringRef GPU, StringRef FS);
392 
393  const SIInstrInfo *getInstrInfo() const override {
394  return &InstrInfo;
395  }
396 
397  const SIFrameLowering *getFrameLowering() const override {
398  return &FrameLowering;
399  }
400 
401  const SITargetLowering *getTargetLowering() const override {
402  return &TLInfo;
403  }
404 
405  const SIRegisterInfo *getRegisterInfo() const override {
406  return &InstrInfo.getRegisterInfo();
407  }
408 
409  const CallLowering *getCallLowering() const override {
410  return CallLoweringInfo.get();
411  }
412 
413  const InstructionSelector *getInstructionSelector() const override {
414  return InstSelector.get();
415  }
416 
417  const LegalizerInfo *getLegalizerInfo() const override {
418  return Legalizer.get();
419  }
420 
421  const RegisterBankInfo *getRegBankInfo() const override {
422  return RegBankInfo.get();
423  }
424 
425  // Nothing implemented, just prevent crashes on use.
426  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
427  return &TSInfo;
428  }
429 
430  const InstrItineraryData *getInstrItineraryData() const override {
431  return &InstrItins;
432  }
433 
434  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
435 
437  return (Generation)Gen;
438  }
439 
440  unsigned getWavefrontSizeLog2() const {
441  return Log2_32(WavefrontSize);
442  }
443 
444  /// Return the number of high bits known to be zero fror a frame index.
446  return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
447  }
448 
449  int getLDSBankCount() const {
450  return LDSBankCount;
451  }
452 
453  unsigned getMaxPrivateElementSize() const {
454  return MaxPrivateElementSize;
455  }
456 
457  unsigned getConstantBusLimit(unsigned Opcode) const;
458 
459  bool hasIntClamp() const {
460  return HasIntClamp;
461  }
462 
463  bool hasFP64() const {
464  return FP64;
465  }
466 
467  bool hasMIMG_R128() const {
468  return MIMG_R128;
469  }
470 
471  bool hasHWFP64() const {
472  return FP64;
473  }
474 
475  bool hasFastFMAF32() const {
476  return FastFMAF32;
477  }
478 
479  bool hasHalfRate64Ops() const {
480  return HalfRate64Ops;
481  }
482 
483  bool hasAddr64() const {
484  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
485  }
486 
487  // Return true if the target only has the reverse operand versions of VALU
488  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
489  bool hasOnlyRevVALUShifts() const {
490  return getGeneration() >= VOLCANIC_ISLANDS;
491  }
492 
493  bool hasBFE() const {
494  return true;
495  }
496 
497  bool hasBFI() const {
498  return true;
499  }
500 
501  bool hasBFM() const {
502  return hasBFE();
503  }
504 
505  bool hasBCNT(unsigned Size) const {
506  return true;
507  }
508 
509  bool hasFFBL() const {
510  return true;
511  }
512 
513  bool hasFFBH() const {
514  return true;
515  }
516 
517  bool hasMed3_16() const {
518  return getGeneration() >= AMDGPUSubtarget::GFX9;
519  }
520 
521  bool hasMin3Max3_16() const {
522  return getGeneration() >= AMDGPUSubtarget::GFX9;
523  }
524 
525  bool hasFmaMixInsts() const {
526  return HasFmaMixInsts;
527  }
528 
529  bool hasCARRY() const {
530  return true;
531  }
532 
533  bool hasFMA() const {
534  return FMA;
535  }
536 
537  bool hasSwap() const {
538  return GFX9Insts;
539  }
540 
542  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
543  }
544 
545  /// True if the offset field of DS instructions works as expected. On SI, the
546  /// offset uses a 16-bit adder and does not always wrap properly.
547  bool hasUsableDSOffset() const {
548  return getGeneration() >= SEA_ISLANDS;
549  }
550 
552  return EnableUnsafeDSOffsetFolding;
553  }
554 
555  /// Condition output from div_scale is usable.
557  return getGeneration() != SOUTHERN_ISLANDS;
558  }
559 
560  /// Extra wait hazard is needed in some cases before
561  /// s_cbranch_vccnz/s_cbranch_vccz.
562  bool hasReadVCCZBug() const {
563  return getGeneration() <= SEA_ISLANDS;
564  }
565 
566  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
567  /// was written by a VALU instruction.
569  return getGeneration() == SOUTHERN_ISLANDS;
570  }
571 
572  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
573  /// SGPR was written by a VALU Instruction.
575  return getGeneration() >= VOLCANIC_ISLANDS;
576  }
577 
578  bool hasRFEHazards() const {
579  return getGeneration() >= VOLCANIC_ISLANDS;
580  }
581 
582  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
583  unsigned getSetRegWaitStates() const {
584  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
585  }
586 
587  bool dumpCode() const {
588  return DumpCode;
589  }
590 
591  /// Return the amount of LDS that can be used that will not restrict the
592  /// occupancy lower than WaveCount.
593  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
594  const Function &) const;
595 
596  bool hasFP16Denormals() const {
597  return FP64FP16Denormals;
598  }
599 
600  bool hasFP64Denormals() const {
601  return FP64FP16Denormals;
602  }
603 
605  return getGeneration() >= AMDGPUSubtarget::GFX9;
606  }
607 
608  bool useFlatForGlobal() const {
609  return FlatForGlobal;
610  }
611 
612  /// \returns If target supports ds_read/write_b128 and user enables generation
613  /// of ds_read/write_b128.
614  bool useDS128() const {
615  return CIInsts && EnableDS128;
616  }
617 
618  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
619  bool haveRoundOpsF64() const {
620  return CIInsts;
621  }
622 
623  /// \returns If MUBUF instructions always perform range checking, even for
624  /// buffer resources used for private memory access.
626  return getGeneration() < AMDGPUSubtarget::GFX9;
627  }
628 
629  /// \returns If target requires PRT Struct NULL support (zero result registers
630  /// for sparse texture support).
631  bool usePRTStrictNull() const {
632  return EnablePRTStrictNull;
633  }
634 
636  return AutoWaitcntBeforeBarrier;
637  }
638 
639  bool hasCodeObjectV3() const {
640  // FIXME: Need to add code object v3 support for mesa and pal.
641  return isAmdHsaOS() ? CodeObjectV3 : false;
642  }
643 
645  return UnalignedBufferAccess;
646  }
647 
649  return UnalignedScratchAccess;
650  }
651 
652  bool hasApertureRegs() const {
653  return HasApertureRegs;
654  }
655 
656  bool isTrapHandlerEnabled() const {
657  return TrapHandler;
658  }
659 
660  bool isXNACKEnabled() const {
661  return EnableXNACK;
662  }
663 
664  bool isCuModeEnabled() const {
665  return EnableCuMode;
666  }
667 
668  bool hasFlatAddressSpace() const {
669  return FlatAddressSpace;
670  }
671 
672  bool hasFlatScrRegister() const {
673  return hasFlatAddressSpace();
674  }
675 
676  bool hasFlatInstOffsets() const {
677  return FlatInstOffsets;
678  }
679 
680  bool hasFlatGlobalInsts() const {
681  return FlatGlobalInsts;
682  }
683 
684  bool hasFlatScratchInsts() const {
685  return FlatScratchInsts;
686  }
687 
689  return ScalarFlatScratchInsts;
690  }
691 
692  bool hasFlatSegmentOffsetBug() const {
693  return HasFlatSegmentOffsetBug;
694  }
695 
697  return getGeneration() > GFX9;
698  }
699 
700  bool hasD16LoadStore() const {
701  return getGeneration() >= GFX9;
702  }
703 
704  bool d16PreservesUnusedBits() const {
705  return hasD16LoadStore() && !isSRAMECCEnabled();
706  }
707 
708  bool hasD16Images() const {
709  return getGeneration() >= VOLCANIC_ISLANDS;
710  }
711 
712  /// Return if most LDS instructions have an m0 use that require m0 to be
713  /// iniitalized.
714  bool ldsRequiresM0Init() const {
715  return getGeneration() < GFX9;
716  }
717 
718  // True if the hardware rewinds and replays GWS operations if a wave is
719  // preempted.
720  //
721  // If this is false, a GWS operation requires testing if a nack set the
722  // MEM_VIOL bit, and repeating if so.
723  bool hasGWSAutoReplay() const {
724  return getGeneration() >= GFX9;
725  }
726 
727  /// \returns if target has ds_gws_sema_release_all instruction.
728  bool hasGWSSemaReleaseAll() const {
729  return CIInsts;
730  }
731 
732  bool hasAddNoCarry() const {
733  return AddNoCarryInsts;
734  }
735 
736  bool hasUnpackedD16VMem() const {
737  return HasUnpackedD16VMem;
738  }
739 
740  // Covers VS/PS/CS graphics shaders
741  bool isMesaGfxShader(const Function &F) const {
742  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
743  }
744 
745  bool hasMad64_32() const {
746  return getGeneration() >= SEA_ISLANDS;
747  }
748 
749  bool hasSDWAOmod() const {
750  return HasSDWAOmod;
751  }
752 
753  bool hasSDWAScalar() const {
754  return HasSDWAScalar;
755  }
756 
757  bool hasSDWASdst() const {
758  return HasSDWASdst;
759  }
760 
761  bool hasSDWAMac() const {
762  return HasSDWAMac;
763  }
764 
765  bool hasSDWAOutModsVOPC() const {
766  return HasSDWAOutModsVOPC;
767  }
768 
769  bool hasDLInsts() const {
770  return HasDLInsts;
771  }
772 
773  bool hasDot1Insts() const {
774  return HasDot1Insts;
775  }
776 
777  bool hasDot2Insts() const {
778  return HasDot2Insts;
779  }
780 
781  bool hasDot5Insts() const {
782  return HasDot5Insts;
783  }
784 
785  bool hasDot6Insts() const {
786  return HasDot6Insts;
787  }
788 
789  bool isSRAMECCEnabled() const {
790  return EnableSRAMECC;
791  }
792 
793  bool hasNoSdstCMPX() const {
794  return HasNoSdstCMPX;
795  }
796 
797  bool hasVscnt() const {
798  return HasVscnt;
799  }
800 
801  bool hasRegisterBanking() const {
802  return HasRegisterBanking;
803  }
804 
805  bool hasVOP3Literal() const {
806  return HasVOP3Literal;
807  }
808 
809  bool hasNoDataDepHazard() const {
810  return HasNoDataDepHazard;
811  }
812 
814  return getGeneration() < SEA_ISLANDS;
815  }
816 
817  // Scratch is allocated in 256 dword per wave blocks for the entire
818  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
819  // is 4-byte aligned.
820  //
821  // Only 4-byte alignment is really needed to access anything. Transformations
822  // on the pointer value itself may rely on the alignment / known low bits of
823  // the pointer. Set this to something above the minimum to avoid needing
824  // dynamic realignment in common cases.
825  unsigned getStackAlignment() const {
826  return 16;
827  }
828 
829  bool enableMachineScheduler() const override {
830  return true;
831  }
832 
833  bool enableSubRegLiveness() const override {
834  return true;
835  }
836 
837  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
839 
840  /// \returns Number of execution units per compute unit supported by the
841  /// subtarget.
842  unsigned getEUsPerCU() const {
843  return AMDGPU::IsaInfo::getEUsPerCU(this);
844  }
845 
846  /// \returns Maximum number of waves per compute unit supported by the
847  /// subtarget without any kind of limitation.
848  unsigned getMaxWavesPerCU() const {
850  }
851 
852  /// \returns Maximum number of waves per compute unit supported by the
853  /// subtarget and limited by given \p FlatWorkGroupSize.
854  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
855  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
856  }
857 
858  /// \returns Maximum number of waves per execution unit supported by the
859  /// subtarget without any kind of limitation.
860  unsigned getMaxWavesPerEU() const {
862  }
863 
864  /// \returns Number of waves per work group supported by the subtarget and
865  /// limited by given \p FlatWorkGroupSize.
866  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
867  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
868  }
869 
870  // static wrappers
871  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
872 
873  // XXX - Why is this here if it isn't in the default pass set?
874  bool enableEarlyIfConversion() const override {
875  return true;
876  }
877 
878  void overrideSchedPolicy(MachineSchedPolicy &Policy,
879  unsigned NumRegionInstrs) const override;
880 
881  unsigned getMaxNumUserSGPRs() const {
882  return 16;
883  }
884 
885  bool hasSMemRealTime() const {
886  return HasSMemRealTime;
887  }
888 
889  bool hasMovrel() const {
890  return HasMovrel;
891  }
892 
893  bool hasVGPRIndexMode() const {
894  return HasVGPRIndexMode;
895  }
896 
897  bool useVGPRIndexMode(bool UserEnable) const {
898  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
899  }
900 
901  bool hasScalarCompareEq64() const {
902  return getGeneration() >= VOLCANIC_ISLANDS;
903  }
904 
905  bool hasScalarStores() const {
906  return HasScalarStores;
907  }
908 
909  bool hasScalarAtomics() const {
910  return HasScalarAtomics;
911  }
912 
913  bool hasLDSFPAtomics() const {
914  return GFX8Insts;
915  }
916 
917  bool hasDPP() const {
918  return HasDPP;
919  }
920 
921  bool hasDPP8() const {
922  return HasDPP8;
923  }
924 
925  bool hasR128A16() const {
926  return HasR128A16;
927  }
928 
929  bool hasNSAEncoding() const {
930  return HasNSAEncoding;
931  }
932 
933  bool hasMadF16() const;
934 
935  bool enableSIScheduler() const {
936  return EnableSIScheduler;
937  }
938 
939  bool loadStoreOptEnabled() const {
940  return EnableLoadStoreOpt;
941  }
942 
943  bool hasSGPRInitBug() const {
944  return SGPRInitBug;
945  }
946 
947  bool has12DWordStoreHazard() const {
948  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
949  }
950 
951  // \returns true if the subtarget supports DWORDX3 load/store instructions.
952  bool hasDwordx3LoadStores() const {
953  return CIInsts;
954  }
955 
956  bool hasSMovFedHazard() const {
957  return getGeneration() == AMDGPUSubtarget::GFX9;
958  }
959 
961  return getGeneration() == AMDGPUSubtarget::GFX9;
962  }
963 
964  bool hasReadM0SendMsgHazard() const {
965  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
966  getGeneration() <= AMDGPUSubtarget::GFX9;
967  }
968 
969  bool hasVcmpxPermlaneHazard() const {
970  return HasVcmpxPermlaneHazard;
971  }
972 
974  return HasVMEMtoScalarWriteHazard;
975  }
976 
978  return HasSMEMtoVectorWriteHazard;
979  }
980 
981  bool hasLDSMisalignedBug() const {
982  return LDSMisalignedBug && !EnableCuMode;
983  }
984 
985  bool hasInstFwdPrefetchBug() const {
986  return HasInstFwdPrefetchBug;
987  }
988 
989  bool hasVcmpxExecWARHazard() const {
990  return HasVcmpxExecWARHazard;
991  }
992 
994  return HasLdsBranchVmemWARHazard;
995  }
996 
997  bool hasNSAtoVMEMBug() const {
998  return HasNSAtoVMEMBug;
999  }
1000 
1001  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1002  /// SGPRs
1003  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1004 
1005  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1006  /// VGPRs
1007  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1008 
1009  /// \returns true if the flat_scratch register should be initialized with the
1010  /// pointer to the wave's scratch memory rather than a size and offset.
1011  bool flatScratchIsPointer() const {
1012  return getGeneration() >= AMDGPUSubtarget::GFX9;
1013  }
1014 
1015  /// \returns true if the machine has merged shaders in which s0-s7 are
1016  /// reserved by the hardware and user SGPRs start at s8
1017  bool hasMergedShaders() const {
1018  return getGeneration() >= GFX9;
1019  }
1020 
1021  /// \returns SGPR allocation granularity supported by the subtarget.
1022  unsigned getSGPRAllocGranule() const {
1024  }
1025 
1026  /// \returns SGPR encoding granularity supported by the subtarget.
1027  unsigned getSGPREncodingGranule() const {
1029  }
1030 
1031  /// \returns Total number of SGPRs supported by the subtarget.
1032  unsigned getTotalNumSGPRs() const {
1033  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1034  }
1035 
1036  /// \returns Addressable number of SGPRs supported by the subtarget.
1037  unsigned getAddressableNumSGPRs() const {
1039  }
1040 
1041  /// \returns Minimum number of SGPRs that meets the given number of waves per
1042  /// execution unit requirement supported by the subtarget.
1043  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1044  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1045  }
1046 
1047  /// \returns Maximum number of SGPRs that meets the given number of waves per
1048  /// execution unit requirement supported by the subtarget.
1049  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1050  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1051  }
1052 
1053  /// \returns Reserved number of SGPRs for given function \p MF.
1054  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1055 
1056  /// \returns Maximum number of SGPRs that meets number of waves per execution
1057  /// unit requirement for function \p MF, or number of SGPRs explicitly
1058  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1059  ///
1060  /// \returns Value that meets number of waves per execution unit requirement
1061  /// if explicitly requested value cannot be converted to integer, violates
1062  /// subtarget's specifications, or does not meet number of waves per execution
1063  /// unit requirement.
1064  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1065 
1066  /// \returns VGPR allocation granularity supported by the subtarget.
1067  unsigned getVGPRAllocGranule() const {
1069  }
1070 
1071  /// \returns VGPR encoding granularity supported by the subtarget.
1072  unsigned getVGPREncodingGranule() const {
1074  }
1075 
1076  /// \returns Total number of VGPRs supported by the subtarget.
1077  unsigned getTotalNumVGPRs() const {
1078  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1079  }
1080 
1081  /// \returns Addressable number of VGPRs supported by the subtarget.
1082  unsigned getAddressableNumVGPRs() const {
1084  }
1085 
1086  /// \returns Minimum number of VGPRs that meets given number of waves per
1087  /// execution unit requirement supported by the subtarget.
1088  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1089  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1090  }
1091 
1092  /// \returns Maximum number of VGPRs that meets given number of waves per
1093  /// execution unit requirement supported by the subtarget.
1094  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1095  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1096  }
1097 
1098  /// \returns Maximum number of VGPRs that meets number of waves per execution
1099  /// unit requirement for function \p MF, or number of VGPRs explicitly
1100  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1101  ///
1102  /// \returns Value that meets number of waves per execution unit requirement
1103  /// if explicitly requested value cannot be converted to integer, violates
1104  /// subtarget's specifications, or does not meet number of waves per execution
1105  /// unit requirement.
1106  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1107 
1108  void getPostRAMutations(
1109  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1110  const override;
1111 
1112  bool isWave32() const {
1113  return WavefrontSize == 32;
1114  }
1115 
1117  return getRegisterInfo()->getBoolRC();
1118  }
1119 
1120  /// \returns Maximum number of work groups per compute unit supported by the
1121  /// subtarget and limited by given \p FlatWorkGroupSize.
1122  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1123  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1124  }
1125 
1126  /// \returns Minimum flat work group size supported by the subtarget.
1127  unsigned getMinFlatWorkGroupSize() const override {
1129  }
1130 
1131  /// \returns Maximum flat work group size supported by the subtarget.
1132  unsigned getMaxFlatWorkGroupSize() const override {
1134  }
1135 
1136  /// \returns Maximum number of waves per execution unit supported by the
1137  /// subtarget and limited by given \p FlatWorkGroupSize.
1138  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1139  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1140  }
1141 
1142  /// \returns Minimum number of waves per execution unit supported by the
1143  /// subtarget.
1144  unsigned getMinWavesPerEU() const override {
1145  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1146  }
1147 };
1148 
1150  public AMDGPUSubtarget {
1151 private:
1152  R600InstrInfo InstrInfo;
1153  R600FrameLowering FrameLowering;
1154  bool FMA;
1155  bool CaymanISA;
1156  bool CFALUBug;
1157  bool HasVertexCache;
1158  bool R600ALUInst;
1159  bool FP64;
1160  short TexVTXClauseSize;
1161  Generation Gen;
1162  R600TargetLowering TLInfo;
1163  InstrItineraryData InstrItins;
1164  SelectionDAGTargetInfo TSInfo;
1165 
1166 public:
1167  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1168  const TargetMachine &TM);
1169 
1170  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1171 
1172  const R600FrameLowering *getFrameLowering() const override {
1173  return &FrameLowering;
1174  }
1175 
1176  const R600TargetLowering *getTargetLowering() const override {
1177  return &TLInfo;
1178  }
1179 
1180  const R600RegisterInfo *getRegisterInfo() const override {
1181  return &InstrInfo.getRegisterInfo();
1182  }
1183 
1184  const InstrItineraryData *getInstrItineraryData() const override {
1185  return &InstrItins;
1186  }
1187 
1188  // Nothing implemented, just prevent crashes on use.
1190  return &TSInfo;
1191  }
1192 
1193  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1194 
1196  return Gen;
1197  }
1198 
1199  unsigned getStackAlignment() const {
1200  return 4;
1201  }
1202 
1203  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1204  StringRef GPU, StringRef FS);
1205 
1206  bool hasBFE() const {
1207  return (getGeneration() >= EVERGREEN);
1208  }
1209 
1210  bool hasBFI() const {
1211  return (getGeneration() >= EVERGREEN);
1212  }
1213 
1214  bool hasBCNT(unsigned Size) const {
1215  if (Size == 32)
1216  return (getGeneration() >= EVERGREEN);
1217 
1218  return false;
1219  }
1220 
1221  bool hasBORROW() const {
1222  return (getGeneration() >= EVERGREEN);
1223  }
1224 
1225  bool hasCARRY() const {
1226  return (getGeneration() >= EVERGREEN);
1227  }
1228 
1229  bool hasCaymanISA() const {
1230  return CaymanISA;
1231  }
1232 
1233  bool hasFFBL() const {
1234  return (getGeneration() >= EVERGREEN);
1235  }
1236 
1237  bool hasFFBH() const {
1238  return (getGeneration() >= EVERGREEN);
1239  }
1240 
1241  bool hasFMA() const { return FMA; }
1242 
1243  bool hasCFAluBug() const { return CFALUBug; }
1244 
1245  bool hasVertexCache() const { return HasVertexCache; }
1246 
1247  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1248 
1249  bool enableMachineScheduler() const override {
1250  return true;
1251  }
1252 
1253  bool enableSubRegLiveness() const override {
1254  return true;
1255  }
1256 
1257  /// \returns Maximum number of work groups per compute unit supported by the
1258  /// subtarget and limited by given \p FlatWorkGroupSize.
1259  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1260  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1261  }
1262 
1263  /// \returns Minimum flat work group size supported by the subtarget.
1264  unsigned getMinFlatWorkGroupSize() const override {
1266  }
1267 
1268  /// \returns Maximum flat work group size supported by the subtarget.
1269  unsigned getMaxFlatWorkGroupSize() const override {
1271  }
1272 
1273  /// \returns Maximum number of waves per execution unit supported by the
1274  /// subtarget and limited by given \p FlatWorkGroupSize.
1275  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1276  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1277  }
1278 
1279  /// \returns Minimum number of waves per execution unit supported by the
1280  /// subtarget.
1281  unsigned getMinWavesPerEU() const override {
1282  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1283  }
1284 };
1285 
1286 } // end namespace llvm
1287 
1288 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasVscnt() const
bool hasSDWAOmod() const
bool hasLDSMisalignedBug() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasVOP3Literal() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasFlatScrRegister() const
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:304
bool isPromoteAllocaEnabled() const
bool d16PreservesUnusedBits() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
bool hasNSAtoVMEMBug() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasVcmpxPermlaneHazard() const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasInstFwdPrefetchBug() const
bool hasFastFMAF32() const
Generation getGeneration() const
bool hasFlatSegmentOffsetBug() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasLdsBranchVmemWARHazard() const
bool hasD16Images() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
const TargetRegisterClass * getBoolRC() const
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasVMEMtoScalarWriteHazard() const
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool hasNoDataDepHazard() const
bool dumpCode() const
bool hasDot6Insts() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
bool hasDot2Insts() const
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasNSAEncoding() const
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
bool isCuModeEnabled() const
bool hasLDSFPAtomics() const
bool hasSMEMtoVectorWriteHazard() const
const R600InstrInfo * getInstrInfo() const override
bool hasRegisterBanking() const
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasGWSAutoReplay() const
bool hasOnlyRevVALUShifts() const
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasVcmpxExecWARHazard() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
bool hasGWSSemaReleaseAll() const
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool enableSIScheduler() const
bool hasRFEHazards() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
bool hasScalarFlatScratchInsts() const
unsigned getAlignmentForImplicitArgPtr() const
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool hasDot5Insts() const
bool enableSubRegLiveness() const override
bool hasNoSdstCMPX() const
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
bool hasDot1Insts() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool hasDPP8() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override