LLVM  16.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 #define GET_SUBTARGETINFO_HEADER
25 #include "AMDGPUGenSubtargetInfo.inc"
26 
27 namespace llvm {
28 
29 class GCNTargetMachine;
30 
31 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
32  public AMDGPUSubtarget {
33 public:
35 
36  // Following 2 enums are documented at:
37  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
38  enum class TrapHandlerAbi {
39  NONE = 0x00,
40  AMDHSA = 0x01,
41  };
42 
43  enum class TrapID {
44  LLVMAMDHSATrap = 0x02,
45  LLVMAMDHSADebugTrap = 0x03,
46  };
47 
48 private:
49  /// GlobalISel related APIs.
50  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
51  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
52  std::unique_ptr<InstructionSelector> InstSelector;
53  std::unique_ptr<LegalizerInfo> Legalizer;
54  std::unique_ptr<RegisterBankInfo> RegBankInfo;
55 
56 protected:
57  // Basic subtarget description.
60  unsigned Gen = INVALID;
62  int LDSBankCount = 0;
63  unsigned MaxPrivateElementSize = 0;
64 
65  // Possibly statically set by tablegen, but may want to be overridden.
66  bool FastFMAF32 = false;
67  bool FastDenormalF32 = false;
68  bool HalfRate64Ops = false;
69  bool FullRate64Ops = false;
70 
71  // Dynamically set bits that enable features.
72  bool FlatForGlobal = false;
74  bool BackOffBarrier = false;
75  bool UnalignedScratchAccess = false;
76  bool UnalignedAccessMode = false;
77  bool HasApertureRegs = false;
78  bool SupportsXNACK = false;
79 
80  // This should not be used directly. 'TargetID' tracks the dynamic settings
81  // for XNACK.
82  bool EnableXNACK = false;
83 
84  bool EnableTgSplit = false;
85  bool EnableCuMode = false;
86  bool TrapHandler = false;
87 
88  // Used as options.
89  bool EnableLoadStoreOpt = false;
91  bool EnableSIScheduler = false;
92  bool EnableDS128 = false;
93  bool EnablePRTStrictNull = false;
94  bool DumpCode = false;
95 
96  // Subtarget statically properties set by tablegen
97  bool FP64 = false;
98  bool FMA = false;
99  bool MIMG_R128 = false;
100  bool CIInsts = false;
101  bool GFX8Insts = false;
102  bool GFX9Insts = false;
103  bool GFX90AInsts = false;
104  bool GFX940Insts = false;
105  bool GFX10Insts = false;
106  bool GFX11Insts = false;
107  bool GFX10_3Insts = false;
108  bool GFX7GFX8GFX9Insts = false;
109  bool SGPRInitBug = false;
110  bool UserSGPRInit16Bug = false;
113  bool HasSMemRealTime = false;
114  bool HasIntClamp = false;
115  bool HasFmaMixInsts = false;
116  bool HasMovrel = false;
117  bool HasVGPRIndexMode = false;
118  bool HasScalarStores = false;
119  bool HasScalarAtomics = false;
120  bool HasSDWAOmod = false;
121  bool HasSDWAScalar = false;
122  bool HasSDWASdst = false;
123  bool HasSDWAMac = false;
124  bool HasSDWAOutModsVOPC = false;
125  bool HasDPP = false;
126  bool HasDPP8 = false;
127  bool Has64BitDPP = false;
128  bool HasPackedFP32Ops = false;
129  bool HasImageInsts = false;
130  bool HasExtendedImageInsts = false;
131  bool HasR128A16 = false;
132  bool HasGFX10A16 = false;
133  bool HasG16 = false;
134  bool HasNSAEncoding = false;
135  unsigned NSAMaxSize = 0;
136  bool GFX10_AEncoding = false;
137  bool GFX10_BEncoding = false;
138  bool HasDLInsts = false;
139  bool HasDot1Insts = false;
140  bool HasDot2Insts = false;
141  bool HasDot3Insts = false;
142  bool HasDot4Insts = false;
143  bool HasDot5Insts = false;
144  bool HasDot6Insts = false;
145  bool HasDot7Insts = false;
146  bool HasDot8Insts = false;
147  bool HasMAIInsts = false;
148  bool HasFP8Insts = false;
149  bool HasPkFmacF16Inst = false;
150  bool HasAtomicFaddRtnInsts = false;
154  bool SupportsSRAMECC = false;
155 
156  // This should not be used directly. 'TargetID' tracks the dynamic settings
157  // for SRAMECC.
158  bool EnableSRAMECC = false;
159 
160  bool HasNoSdstCMPX = false;
161  bool HasVscnt = false;
162  bool HasGetWaveIdInst = false;
163  bool HasSMemTimeInst = false;
165  bool HasVOP3Literal = false;
166  bool HasNoDataDepHazard = false;
167  bool FlatAddressSpace = false;
168  bool FlatInstOffsets = false;
169  bool FlatGlobalInsts = false;
170  bool FlatScratchInsts = false;
173  bool EnableFlatScratch = false;
174  bool AddNoCarryInsts = false;
175  bool HasUnpackedD16VMem = false;
176  bool LDSMisalignedBug = false;
178  bool UnalignedBufferAccess = false;
179  bool UnalignedDSAccess = false;
180  bool HasPackedTID = false;
181  bool ScalarizeGlobal = false;
182 
186  bool HasInstFwdPrefetchBug = false;
187  bool HasVcmpxExecWARHazard = false;
189  bool HasNSAtoVMEMBug = false;
190  bool HasNSAClauseBug = false;
191  bool HasOffset3fBug = false;
193  bool HasImageStoreD16Bug = false;
194  bool HasImageGather4D16Bug = false;
195  bool HasGFX11FullVGPRs = false;
196  bool HasMADIntraFwdBug = false;
197  bool HasVOPDInsts = false;
198 
199  // Dummy feature to use for assembler in tablegen.
200  bool FeatureDisable = false;
201 
203 private:
204  SIInstrInfo InstrInfo;
205  SITargetLowering TLInfo;
206  SIFrameLowering FrameLowering;
207 
208 public:
209  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
210  const GCNTargetMachine &TM);
211  ~GCNSubtarget() override;
212 
214  StringRef GPU, StringRef FS);
215 
216  const SIInstrInfo *getInstrInfo() const override {
217  return &InstrInfo;
218  }
219 
220  const SIFrameLowering *getFrameLowering() const override {
221  return &FrameLowering;
222  }
223 
224  const SITargetLowering *getTargetLowering() const override {
225  return &TLInfo;
226  }
227 
228  const SIRegisterInfo *getRegisterInfo() const override {
229  return &InstrInfo.getRegisterInfo();
230  }
231 
232  const CallLowering *getCallLowering() const override {
233  return CallLoweringInfo.get();
234  }
235 
236  const InlineAsmLowering *getInlineAsmLowering() const override {
237  return InlineAsmLoweringInfo.get();
238  }
239 
241  return InstSelector.get();
242  }
243 
244  const LegalizerInfo *getLegalizerInfo() const override {
245  return Legalizer.get();
246  }
247 
248  const RegisterBankInfo *getRegBankInfo() const override {
249  return RegBankInfo.get();
250  }
251 
253  return TargetID;
254  }
255 
256  // Nothing implemented, just prevent crashes on use.
257  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
258  return &TSInfo;
259  }
260 
261  const InstrItineraryData *getInstrItineraryData() const override {
262  return &InstrItins;
263  }
264 
266 
268  return (Generation)Gen;
269  }
270 
271  unsigned getMaxWaveScratchSize() const {
272  // See COMPUTE_TMPRING_SIZE.WAVESIZE.
273  if (getGeneration() < GFX11) {
274  // 13-bit field in units of 256-dword.
275  return (256 * 4) * ((1 << 13) - 1);
276  }
277  // 15-bit field in units of 64-dword.
278  return (64 * 4) * ((1 << 15) - 1);
279  }
280 
281  /// Return the number of high bits known to be zero for a frame index.
284  }
285 
286  int getLDSBankCount() const {
287  return LDSBankCount;
288  }
289 
290  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
291  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
292  }
293 
294  unsigned getConstantBusLimit(unsigned Opcode) const;
295 
296  /// Returns if the result of this instruction with a 16-bit result returned in
297  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
298  /// the original value.
299  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
300 
301  bool hasIntClamp() const {
302  return HasIntClamp;
303  }
304 
305  bool hasFP64() const {
306  return FP64;
307  }
308 
309  bool hasMIMG_R128() const {
310  return MIMG_R128;
311  }
312 
313  bool hasHWFP64() const {
314  return FP64;
315  }
316 
317  bool hasFastFMAF32() const {
318  return FastFMAF32;
319  }
320 
321  bool hasHalfRate64Ops() const {
322  return HalfRate64Ops;
323  }
324 
325  bool hasFullRate64Ops() const {
326  return FullRate64Ops;
327  }
328 
329  bool hasAddr64() const {
331  }
332 
333  bool hasFlat() const {
335  }
336 
337  // Return true if the target only has the reverse operand versions of VALU
338  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
339  bool hasOnlyRevVALUShifts() const {
340  return getGeneration() >= VOLCANIC_ISLANDS;
341  }
342 
343  bool hasFractBug() const {
344  return getGeneration() == SOUTHERN_ISLANDS;
345  }
346 
347  bool hasBFE() const {
348  return true;
349  }
350 
351  bool hasBFI() const {
352  return true;
353  }
354 
355  bool hasBFM() const {
356  return hasBFE();
357  }
358 
359  bool hasBCNT(unsigned Size) const {
360  return true;
361  }
362 
363  bool hasFFBL() const {
364  return true;
365  }
366 
367  bool hasFFBH() const {
368  return true;
369  }
370 
371  bool hasMed3_16() const {
373  }
374 
375  bool hasMin3Max3_16() const {
377  }
378 
379  bool hasFmaMixInsts() const {
380  return HasFmaMixInsts;
381  }
382 
383  bool hasCARRY() const {
384  return true;
385  }
386 
387  bool hasFMA() const {
388  return FMA;
389  }
390 
391  bool hasSwap() const {
392  return GFX9Insts;
393  }
394 
395  bool hasScalarPackInsts() const {
396  return GFX9Insts;
397  }
398 
399  bool hasScalarMulHiInsts() const {
400  return GFX9Insts;
401  }
402 
405  }
406 
407  bool supportsGetDoorbellID() const {
408  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
409  return getGeneration() >= GFX9;
410  }
411 
412  /// True if the offset field of DS instructions works as expected. On SI, the
413  /// offset uses a 16-bit adder and does not always wrap properly.
414  bool hasUsableDSOffset() const {
415  return getGeneration() >= SEA_ISLANDS;
416  }
417 
420  }
421 
422  /// Condition output from div_scale is usable.
424  return getGeneration() != SOUTHERN_ISLANDS;
425  }
426 
427  /// Extra wait hazard is needed in some cases before
428  /// s_cbranch_vccnz/s_cbranch_vccz.
429  bool hasReadVCCZBug() const {
430  return getGeneration() <= SEA_ISLANDS;
431  }
432 
433  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
435  return getGeneration() >= GFX10;
436  }
437 
438  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
439  /// was written by a VALU instruction.
441  return getGeneration() == SOUTHERN_ISLANDS;
442  }
443 
444  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
445  /// SGPR was written by a VALU Instruction.
447  return getGeneration() >= VOLCANIC_ISLANDS;
448  }
449 
450  bool hasRFEHazards() const {
451  return getGeneration() >= VOLCANIC_ISLANDS;
452  }
453 
454  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
455  unsigned getSetRegWaitStates() const {
456  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
457  }
458 
459  bool dumpCode() const {
460  return DumpCode;
461  }
462 
463  /// Return the amount of LDS that can be used that will not restrict the
464  /// occupancy lower than WaveCount.
465  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
466  const Function &) const;
467 
470  }
471 
472  /// \returns If target supports S_DENORM_MODE.
473  bool hasDenormModeInst() const {
475  }
476 
477  bool useFlatForGlobal() const {
478  return FlatForGlobal;
479  }
480 
481  /// \returns If target supports ds_read/write_b128 and user enables generation
482  /// of ds_read/write_b128.
483  bool useDS128() const {
484  return CIInsts && EnableDS128;
485  }
486 
487  /// \return If target supports ds_read/write_b96/128.
488  bool hasDS96AndDS128() const {
489  return CIInsts;
490  }
491 
492  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
493  bool haveRoundOpsF64() const {
494  return CIInsts;
495  }
496 
497  /// \returns If MUBUF instructions always perform range checking, even for
498  /// buffer resources used for private memory access.
501  }
502 
503  /// \returns If target requires PRT Struct NULL support (zero result registers
504  /// for sparse texture support).
505  bool usePRTStrictNull() const {
506  return EnablePRTStrictNull;
507  }
508 
511  }
512 
513  /// \returns true if the target supports backing off of s_barrier instructions
514  /// when an exception is raised.
515  bool supportsBackOffBarrier() const {
516  return BackOffBarrier;
517  }
518 
520  return UnalignedBufferAccess;
521  }
522 
525  }
526 
527  bool hasUnalignedDSAccess() const {
528  return UnalignedDSAccess;
529  }
530 
533  }
534 
536  return UnalignedScratchAccess;
537  }
538 
539  bool hasUnalignedAccessMode() const {
540  return UnalignedAccessMode;
541  }
542 
543  bool hasApertureRegs() const {
544  return HasApertureRegs;
545  }
546 
547  bool isTrapHandlerEnabled() const {
548  return TrapHandler;
549  }
550 
551  bool isXNACKEnabled() const {
552  return TargetID.isXnackOnOrAny();
553  }
554 
555  bool isTgSplitEnabled() const {
556  return EnableTgSplit;
557  }
558 
559  bool isCuModeEnabled() const {
560  return EnableCuMode;
561  }
562 
563  bool hasFlatAddressSpace() const {
564  return FlatAddressSpace;
565  }
566 
567  bool hasFlatScrRegister() const {
568  return hasFlatAddressSpace();
569  }
570 
571  bool hasFlatInstOffsets() const {
572  return FlatInstOffsets;
573  }
574 
575  bool hasFlatGlobalInsts() const {
576  return FlatGlobalInsts;
577  }
578 
579  bool hasFlatScratchInsts() const {
580  return FlatScratchInsts;
581  }
582 
583  // Check if target supports ST addressing mode with FLAT scratch instructions.
584  // The ST addressing mode means no registers are used, either VGPR or SGPR,
585  // but only immediate offset is swizzled and added to the FLAT scratch base.
586  bool hasFlatScratchSTMode() const {
588  }
589 
590  bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
591 
593  return ScalarFlatScratchInsts;
594  }
595 
596  bool enableFlatScratch() const {
597  return flatScratchIsArchitected() ||
599  }
600 
601  bool hasGlobalAddTidInsts() const {
602  return GFX10_BEncoding;
603  }
604 
605  bool hasAtomicCSub() const {
606  return GFX10_BEncoding;
607  }
608 
610  return getGeneration() >= GFX9;
611  }
612 
613  bool hasFlatSegmentOffsetBug() const {
615  }
616 
618  return getGeneration() > GFX9;
619  }
620 
621  bool hasD16LoadStore() const {
622  return getGeneration() >= GFX9;
623  }
624 
625  bool d16PreservesUnusedBits() const {
627  }
628 
629  bool hasD16Images() const {
630  return getGeneration() >= VOLCANIC_ISLANDS;
631  }
632 
633  /// Return if most LDS instructions have an m0 use that require m0 to be
634  /// initialized.
635  bool ldsRequiresM0Init() const {
636  return getGeneration() < GFX9;
637  }
638 
639  // True if the hardware rewinds and replays GWS operations if a wave is
640  // preempted.
641  //
642  // If this is false, a GWS operation requires testing if a nack set the
643  // MEM_VIOL bit, and repeating if so.
644  bool hasGWSAutoReplay() const {
645  return getGeneration() >= GFX9;
646  }
647 
648  /// \returns if target has ds_gws_sema_release_all instruction.
649  bool hasGWSSemaReleaseAll() const {
650  return CIInsts;
651  }
652 
653  /// \returns true if the target has integer add/sub instructions that do not
654  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
655  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
656  /// for saturation.
657  bool hasAddNoCarry() const {
658  return AddNoCarryInsts;
659  }
660 
661  bool hasUnpackedD16VMem() const {
662  return HasUnpackedD16VMem;
663  }
664 
665  // Covers VS/PS/CS graphics shaders
666  bool isMesaGfxShader(const Function &F) const {
667  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
668  }
669 
670  bool hasMad64_32() const {
671  return getGeneration() >= SEA_ISLANDS;
672  }
673 
674  bool hasSDWAOmod() const {
675  return HasSDWAOmod;
676  }
677 
678  bool hasSDWAScalar() const {
679  return HasSDWAScalar;
680  }
681 
682  bool hasSDWASdst() const {
683  return HasSDWASdst;
684  }
685 
686  bool hasSDWAMac() const {
687  return HasSDWAMac;
688  }
689 
690  bool hasSDWAOutModsVOPC() const {
691  return HasSDWAOutModsVOPC;
692  }
693 
694  bool hasDLInsts() const {
695  return HasDLInsts;
696  }
697 
698  bool hasDot1Insts() const {
699  return HasDot1Insts;
700  }
701 
702  bool hasDot2Insts() const {
703  return HasDot2Insts;
704  }
705 
706  bool hasDot3Insts() const {
707  return HasDot3Insts;
708  }
709 
710  bool hasDot4Insts() const {
711  return HasDot4Insts;
712  }
713 
714  bool hasDot5Insts() const {
715  return HasDot5Insts;
716  }
717 
718  bool hasDot6Insts() const {
719  return HasDot6Insts;
720  }
721 
722  bool hasDot7Insts() const {
723  return HasDot7Insts;
724  }
725 
726  bool hasDot8Insts() const {
727  return HasDot8Insts;
728  }
729 
730  bool hasMAIInsts() const {
731  return HasMAIInsts;
732  }
733 
734  bool hasFP8Insts() const {
735  return HasFP8Insts;
736  }
737 
738  bool hasPkFmacF16Inst() const {
739  return HasPkFmacF16Inst;
740  }
741 
742  bool hasAtomicFaddInsts() const {
744  }
745 
747 
749 
751 
753 
754  bool hasNoSdstCMPX() const {
755  return HasNoSdstCMPX;
756  }
757 
758  bool hasVscnt() const {
759  return HasVscnt;
760  }
761 
762  bool hasGetWaveIdInst() const {
763  return HasGetWaveIdInst;
764  }
765 
766  bool hasSMemTimeInst() const {
767  return HasSMemTimeInst;
768  }
769 
770  bool hasShaderCyclesRegister() const {
772  }
773 
774  bool hasVOP3Literal() const {
775  return HasVOP3Literal;
776  }
777 
778  bool hasNoDataDepHazard() const {
779  return HasNoDataDepHazard;
780  }
781 
783  return getGeneration() < SEA_ISLANDS;
784  }
785 
786  bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
787 
788  // Scratch is allocated in 256 dword per wave blocks for the entire
789  // wavefront. When viewed from the perspective of an arbitrary workitem, this
790  // is 4-byte aligned.
791  //
792  // Only 4-byte alignment is really needed to access anything. Transformations
793  // on the pointer value itself may rely on the alignment / known low bits of
794  // the pointer. Set this to something above the minimum to avoid needing
795  // dynamic realignment in common cases.
796  Align getStackAlignment() const { return Align(16); }
797 
798  bool enableMachineScheduler() const override {
799  return true;
800  }
801 
802  bool useAA() const override;
803 
804  bool enableSubRegLiveness() const override {
805  return true;
806  }
807 
810 
811  // static wrappers
812  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
813 
814  // XXX - Why is this here if it isn't in the default pass set?
815  bool enableEarlyIfConversion() const override {
816  return true;
817  }
818 
820  unsigned NumRegionInstrs) const override;
821 
822  unsigned getMaxNumUserSGPRs() const {
823  return 16;
824  }
825 
826  bool hasSMemRealTime() const {
827  return HasSMemRealTime;
828  }
829 
830  bool hasMovrel() const {
831  return HasMovrel;
832  }
833 
834  bool hasVGPRIndexMode() const {
835  return HasVGPRIndexMode;
836  }
837 
838  bool useVGPRIndexMode() const;
839 
840  bool hasScalarCompareEq64() const {
841  return getGeneration() >= VOLCANIC_ISLANDS;
842  }
843 
844  bool hasScalarStores() const {
845  return HasScalarStores;
846  }
847 
848  bool hasScalarAtomics() const {
849  return HasScalarAtomics;
850  }
851 
852  bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
853 
854  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
855  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
856 
857  /// \returns true if the subtarget has the v_permlane64_b32 instruction.
858  bool hasPermLane64() const { return getGeneration() >= GFX11; }
859 
860  bool hasDPP() const {
861  return HasDPP;
862  }
863 
864  bool hasDPPBroadcasts() const {
865  return HasDPP && getGeneration() < GFX10;
866  }
867 
868  bool hasDPPWavefrontShifts() const {
869  return HasDPP && getGeneration() < GFX10;
870  }
871 
872  bool hasDPP8() const {
873  return HasDPP8;
874  }
875 
876  bool has64BitDPP() const {
877  return Has64BitDPP;
878  }
879 
880  bool hasPackedFP32Ops() const {
881  return HasPackedFP32Ops;
882  }
883 
884  bool hasFmaakFmamkF32Insts() const {
885  return getGeneration() >= GFX10 || hasGFX940Insts();
886  }
887 
888  bool hasImageInsts() const {
889  return HasImageInsts;
890  }
891 
892  bool hasExtendedImageInsts() const {
893  return HasExtendedImageInsts;
894  }
895 
896  bool hasR128A16() const {
897  return HasR128A16;
898  }
899 
900  bool hasGFX10A16() const {
901  return HasGFX10A16;
902  }
903 
904  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
905 
906  bool hasG16() const { return HasG16; }
907 
908  bool hasOffset3fBug() const {
909  return HasOffset3fBug;
910  }
911 
912  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
913 
915 
916  bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
917 
918  bool hasNSAEncoding() const { return HasNSAEncoding; }
919 
920  unsigned getNSAMaxSize() const { return NSAMaxSize; }
921 
922  bool hasGFX10_AEncoding() const {
923  return GFX10_AEncoding;
924  }
925 
926  bool hasGFX10_BEncoding() const {
927  return GFX10_BEncoding;
928  }
929 
930  bool hasGFX10_3Insts() const {
931  return GFX10_3Insts;
932  }
933 
934  bool hasMadF16() const;
935 
936  bool hasMovB64() const { return GFX940Insts; }
937 
938  bool hasLshlAddB64() const { return GFX940Insts; }
939 
940  bool enableSIScheduler() const {
941  return EnableSIScheduler;
942  }
943 
944  bool loadStoreOptEnabled() const {
945  return EnableLoadStoreOpt;
946  }
947 
948  bool hasSGPRInitBug() const {
949  return SGPRInitBug;
950  }
951 
952  bool hasUserSGPRInit16Bug() const {
953  return UserSGPRInit16Bug && isWave32();
954  }
955 
957 
960  }
961 
962  bool hasMFMAInlineLiteralBug() const {
964  }
965 
966  bool has12DWordStoreHazard() const {
968  }
969 
970  // \returns true if the subtarget supports DWORDX3 load/store instructions.
971  bool hasDwordx3LoadStores() const {
972  return CIInsts;
973  }
974 
977  }
978 
979  bool hasReadM0SendMsgHazard() const {
982  }
983 
984  bool hasReadM0LdsDmaHazard() const {
986  }
987 
990  }
991 
992  bool hasVcmpxPermlaneHazard() const {
993  return HasVcmpxPermlaneHazard;
994  }
995 
998  }
999 
1002  }
1003 
1004  bool hasLDSMisalignedBug() const {
1005  return LDSMisalignedBug && !EnableCuMode;
1006  }
1007 
1008  bool hasInstFwdPrefetchBug() const {
1009  return HasInstFwdPrefetchBug;
1010  }
1011 
1012  bool hasVcmpxExecWARHazard() const {
1013  return HasVcmpxExecWARHazard;
1014  }
1015 
1018  }
1019 
1020  // Shift amount of a 64 bit shift cannot be a highest allocated register
1021  // if also at the end of the allocation block.
1022  bool hasShift64HighRegBug() const {
1023  return GFX90AInsts && !GFX940Insts;
1024  }
1025 
1026  // Has one cycle hazard on transcendental instruction feeding a
1027  // non transcendental VALU.
1028  bool hasTransForwardingHazard() const { return GFX940Insts; }
1029 
1030  // Has one cycle hazard on a VALU instruction partially writing dst with
1031  // a shift of result bits feeding another VALU instruction.
1032  bool hasDstSelForwardingHazard() const { return GFX940Insts; }
1033 
1034  // Cannot use op_sel with v_dot instructions.
1035  bool hasDOTOpSelHazard() const { return GFX940Insts; }
1036 
1037  // Does not have HW interlocs for VALU writing and then reading SGPRs.
1038  bool hasVDecCoExecHazard() const {
1039  return GFX940Insts;
1040  }
1041 
1042  bool hasNSAtoVMEMBug() const {
1043  return HasNSAtoVMEMBug;
1044  }
1045 
1046  bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1047 
1048  bool hasHardClauses() const { return getGeneration() >= GFX10; }
1049 
1050  bool hasGFX90AInsts() const { return GFX90AInsts; }
1051 
1053  return getGeneration() == GFX10;
1054  }
1055 
1056  bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1057 
1058  bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1059 
1061  return getGeneration() >= GFX11;
1062  }
1063 
1064  bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; }
1065 
1066  bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1067 
1068  /// Return if operations acting on VGPR tuples require even alignment.
1069  bool needsAlignedVGPRs() const { return GFX90AInsts; }
1070 
1071  /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1072  bool hasSPackHL() const { return GFX11Insts; }
1073 
1074  /// Return true if the target's EXP instruction has the COMPR flag, which
1075  /// affects the meaning of the EN (enable) bits.
1076  bool hasCompressedExport() const { return !GFX11Insts; }
1077 
1078  /// Return true if the target's EXP instruction supports the NULL export
1079  /// target.
1080  bool hasNullExportTarget() const { return !GFX11Insts; }
1081 
1082  bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1083 
1084  bool hasVOPDInsts() const { return HasVOPDInsts; }
1085 
1086  bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
1087 
1088  /// Return true if the target has the S_DELAY_ALU instruction.
1089  bool hasDelayAlu() const { return GFX11Insts; }
1090 
1091  bool hasPackedTID() const { return HasPackedTID; }
1092 
1093  // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1094  // hasGFX90AInsts is also true.
1095  bool hasGFX940Insts() const { return GFX940Insts; }
1096 
1097  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1098  /// SGPRs
1099  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1100 
1101  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1102  /// VGPRs
1103  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1104 
1105  /// Return occupancy for the given function. Used LDS and a number of
1106  /// registers if provided.
1107  /// Note, occupancy can be affected by the scratch allocation as well, but
1108  /// we do not have enough information to compute it.
1109  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1110  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1111 
1112  /// \returns true if the flat_scratch register should be initialized with the
1113  /// pointer to the wave's scratch memory rather than a size and offset.
1114  bool flatScratchIsPointer() const {
1116  }
1117 
1118  /// \returns true if the flat_scratch register is initialized by the HW.
1119  /// In this case it is readonly.
1121 
1122  /// \returns true if the machine has merged shaders in which s0-s7 are
1123  /// reserved by the hardware and user SGPRs start at s8
1124  bool hasMergedShaders() const {
1125  return getGeneration() >= GFX9;
1126  }
1127 
1128  // \returns true if the target supports the pre-NGG legacy geometry path.
1129  bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1130 
1131  /// \returns SGPR allocation granularity supported by the subtarget.
1132  unsigned getSGPRAllocGranule() const {
1134  }
1135 
1136  /// \returns SGPR encoding granularity supported by the subtarget.
1137  unsigned getSGPREncodingGranule() const {
1139  }
1140 
1141  /// \returns Total number of SGPRs supported by the subtarget.
1142  unsigned getTotalNumSGPRs() const {
1143  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1144  }
1145 
1146  /// \returns Addressable number of SGPRs supported by the subtarget.
1147  unsigned getAddressableNumSGPRs() const {
1149  }
1150 
1151  /// \returns Minimum number of SGPRs that meets the given number of waves per
1152  /// execution unit requirement supported by the subtarget.
1153  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1154  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1155  }
1156 
1157  /// \returns Maximum number of SGPRs that meets the given number of waves per
1158  /// execution unit requirement supported by the subtarget.
1159  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1160  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1161  }
1162 
1163  /// \returns Reserved number of SGPRs. This is common
1164  /// utility function called by MachineFunction and
1165  /// Function variants of getReservedNumSGPRs.
1166  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1167  /// \returns Reserved number of SGPRs for given machine function \p MF.
1168  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1169 
1170  /// \returns Reserved number of SGPRs for given function \p F.
1171  unsigned getReservedNumSGPRs(const Function &F) const;
1172 
1173  /// \returns max num SGPRs. This is the common utility
1174  /// function called by MachineFunction and Function
1175  /// variants of getMaxNumSGPRs.
1176  unsigned getBaseMaxNumSGPRs(const Function &F,
1177  std::pair<unsigned, unsigned> WavesPerEU,
1178  unsigned PreloadedSGPRs,
1179  unsigned ReservedNumSGPRs) const;
1180 
1181  /// \returns Maximum number of SGPRs that meets number of waves per execution
1182  /// unit requirement for function \p MF, or number of SGPRs explicitly
1183  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1184  ///
1185  /// \returns Value that meets number of waves per execution unit requirement
1186  /// if explicitly requested value cannot be converted to integer, violates
1187  /// subtarget's specifications, or does not meet number of waves per execution
1188  /// unit requirement.
1189  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1190 
1191  /// \returns Maximum number of SGPRs that meets number of waves per execution
1192  /// unit requirement for function \p F, or number of SGPRs explicitly
1193  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1194  ///
1195  /// \returns Value that meets number of waves per execution unit requirement
1196  /// if explicitly requested value cannot be converted to integer, violates
1197  /// subtarget's specifications, or does not meet number of waves per execution
1198  /// unit requirement.
1199  unsigned getMaxNumSGPRs(const Function &F) const;
1200 
1201  /// \returns VGPR allocation granularity supported by the subtarget.
1202  unsigned getVGPRAllocGranule() const {
1204  }
1205 
1206  /// \returns VGPR encoding granularity supported by the subtarget.
1207  unsigned getVGPREncodingGranule() const {
1209  }
1210 
1211  /// \returns Total number of VGPRs supported by the subtarget.
1212  unsigned getTotalNumVGPRs() const {
1213  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1214  }
1215 
1216  /// \returns Addressable number of VGPRs supported by the subtarget.
1217  unsigned getAddressableNumVGPRs() const {
1219  }
1220 
1221  /// \returns Minimum number of VGPRs that meets given number of waves per
1222  /// execution unit requirement supported by the subtarget.
1223  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1224  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1225  }
1226 
1227  /// \returns Maximum number of VGPRs that meets given number of waves per
1228  /// execution unit requirement supported by the subtarget.
1229  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1230  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1231  }
1232 
1233  /// \returns max num VGPRs. This is the common utility function
1234  /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1235  unsigned getBaseMaxNumVGPRs(const Function &F,
1236  std::pair<unsigned, unsigned> WavesPerEU) const;
1237  /// \returns Maximum number of VGPRs that meets number of waves per execution
1238  /// unit requirement for function \p F, or number of VGPRs explicitly
1239  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1240  ///
1241  /// \returns Value that meets number of waves per execution unit requirement
1242  /// if explicitly requested value cannot be converted to integer, violates
1243  /// subtarget's specifications, or does not meet number of waves per execution
1244  /// unit requirement.
1245  unsigned getMaxNumVGPRs(const Function &F) const;
1246 
1247  unsigned getMaxNumAGPRs(const Function &F) const {
1248  return getMaxNumVGPRs(F);
1249  }
1250 
1251  /// \returns Maximum number of VGPRs that meets number of waves per execution
1252  /// unit requirement for function \p MF, or number of VGPRs explicitly
1253  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1254  ///
1255  /// \returns Value that meets number of waves per execution unit requirement
1256  /// if explicitly requested value cannot be converted to integer, violates
1257  /// subtarget's specifications, or does not meet number of waves per execution
1258  /// unit requirement.
1259  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1260 
1261  void getPostRAMutations(
1262  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1263  const override;
1264 
1265  std::unique_ptr<ScheduleDAGMutation>
1267 
1268  bool isWave32() const {
1269  return getWavefrontSize() == 32;
1270  }
1271 
1272  bool isWave64() const {
1273  return getWavefrontSize() == 64;
1274  }
1275 
1277  return getRegisterInfo()->getBoolRC();
1278  }
1279 
1280  /// \returns Maximum number of work groups per compute unit supported by the
1281  /// subtarget and limited by given \p FlatWorkGroupSize.
1282  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1283  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1284  }
1285 
1286  /// \returns Minimum flat work group size supported by the subtarget.
1287  unsigned getMinFlatWorkGroupSize() const override {
1289  }
1290 
1291  /// \returns Maximum flat work group size supported by the subtarget.
1292  unsigned getMaxFlatWorkGroupSize() const override {
1294  }
1295 
1296  /// \returns Number of waves per execution unit required to support the given
1297  /// \p FlatWorkGroupSize.
1298  unsigned
1299  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1300  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1301  }
1302 
1303  /// \returns Minimum number of waves per execution unit supported by the
1304  /// subtarget.
1305  unsigned getMinWavesPerEU() const override {
1306  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1307  }
1308 
1309  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1310  SDep &Dep) const override;
1311 
1312  // \returns true if it's beneficial on this subtarget for the scheduler to
1313  // cluster stores as well as loads.
1314  bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1315 
1316  // \returns the number of address arguments from which to enable MIMG NSA
1317  // on supported architectures.
1318  unsigned getNSAThreshold(const MachineFunction &MF) const;
1319 };
1320 
1321 } // end namespace llvm
1322 
1323 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::GCNSubtarget::shouldClusterStores
bool shouldClusterStores() const
Definition: GCNSubtarget.h:1314
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:399
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:141
llvm::GCNSubtarget::hasReadM0LdsDmaHazard
bool hasReadM0LdsDmaHazard() const
Definition: GCNSubtarget.h:984
llvm::GCNSubtarget::hasVDecCoExecHazard
bool hasVDecCoExecHazard() const
Definition: GCNSubtarget.h:1038
llvm::GCNSubtarget::HasImageInsts
bool HasImageInsts
Definition: GCNSubtarget.h:129
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:60
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:900
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:355
llvm::GCNSubtarget::hasPermLane64
bool hasPermLane64() const
Definition: GCNSubtarget.h:858
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:702
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:871
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:644
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:617
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:121
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:38
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:162
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:248
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:686
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:258
llvm::GCNSubtarget::hasDstSelForwardingHazard
bool hasDstSelForwardingHazard() const
Definition: GCNSubtarget.h:1032
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:834
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:682
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:220
llvm::GCNSubtarget::hasFP8Insts
bool hasFP8Insts() const
Definition: GCNSubtarget.h:734
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:64
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:629
llvm::GCNSubtarget::GFX11Insts
bool GFX11Insts
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:93
llvm::Function
Definition: Function.h:60
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::hasImageInsts
bool hasImageInsts() const
Definition: GCNSubtarget.h:888
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::hasVALUPartialForwardingHazard
bool hasVALUPartialForwardingHazard() const
Definition: GCNSubtarget.h:1060
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:155
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:169
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:167
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:138
llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition: GCNSubtarget.h:1046
llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:920
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:830
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasMovB64
bool hasMovB64() const
Definition: GCNSubtarget.h:936
llvm::GCNSubtarget::UserSGPRInit16Bug
bool UserSGPRInit16Bug
Definition: GCNSubtarget.h:110
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:446
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:855
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:770
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:1069
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:579
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:179
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:305
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:61
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:193
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:509
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:468
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:613
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:142
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:488
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:183
llvm::GCNSubtarget::hasSPackHL
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
Definition: GCNSubtarget.h:1072
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:455
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::GCNSubtarget::hasVOPDInsts
bool hasVOPDInsts() const
Definition: GCNSubtarget.h:1084
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:575
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:383
llvm::GCNSubtarget::hasShift64HighRegBug
bool hasShift64HighRegBug() const
Definition: GCNSubtarget.h:1022
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:483
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1282
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1292
llvm::GCNSubtarget::hasGFX11FullVGPRs
bool hasGFX11FullVGPRs() const
Definition: GCNSubtarget.h:1082
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:547
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:706
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1004
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:694
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:343
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:971
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:918
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:255
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:124
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:838
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:796
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:531
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:434
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:192
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:944
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:59
llvm::GCNSubtarget::HasDot8Insts
bool HasDot8Insts
Definition: GCNSubtarget.h:146
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:762
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:189
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:119
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:171
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:1032
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:880
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition: GCNSubtarget.h:190
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:840
UseOpIdx
unsigned UseOpIdx
Definition: RISCVInsertVSETVLI.cpp:600
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:826
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:228
llvm::GCNSubtarget::hasCompressedExport
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
Definition: GCNSubtarget.h:1076
llvm::GCNSubtarget::hasLDSFPAtomicAdd
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:852
llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:191
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::hasLdsDirect
bool hasLdsDirect() const
Definition: GCNSubtarget.h:1058
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:216
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:418
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:523
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:321
llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:922
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:649
llvm::GCNSubtarget::getMaxNumAGPRs
unsigned getMaxNumAGPRs(const Function &F) const
Definition: GCNSubtarget.h:1247
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:122
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:880
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:166
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:575
llvm::GCNSubtarget::BackOffBarrier
bool BackOffBarrier
Definition: GCNSubtarget.h:74
InstrInfo
return InstrInfo
Definition: RISCVInsertVSETVLI.cpp:668
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:110
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:282
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:271
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:567
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1808
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:317
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:904
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:798
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:351
llvm::GCNSubtarget::GFX940Insts
bool GFX940Insts
Definition: GCNSubtarget.h:104
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1159
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:596
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:329
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:477
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:379
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:113
llvm::Legalizer
Definition: Legalizer.h:36
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:539
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:592
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:224
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:128
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:674
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:920
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:440
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:602
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:236
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:172
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:896
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:680
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:758
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1287
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:126
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:290
llvm::GCNSubtarget::hasGFX940Insts
bool hasGFX940Insts() const
Definition: GCNSubtarget.h:1095
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:774
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:78
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:367
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:766
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:429
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1268
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:948
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:73
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:826
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:371
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:240
llvm::X86AS::FS
@ FS
Definition: X86.h:200
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:666
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:857
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:926
llvm::GCNSubtarget::HasVOPDInsts
bool HasVOPDInsts
Definition: GCNSubtarget.h:197
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:86
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:738
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:43
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:555
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::SIFrameLowering
Definition: SIFrameLowering.h:16
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition: GCNSubtarget.h:1086
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1229
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:718
llvm::GCNSubtarget::hasFlatAtomicFaddF32Inst
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:752
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:62
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:459
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:848
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:527
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:809
llvm::GCNSubtarget::hasTransForwardingHazard
bool hasTransForwardingHazard() const
Definition: GCNSubtarget.h:1028
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:892
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:698
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:908
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:1012
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1207
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1272
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:188
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:90
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:244
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:174
llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:958
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:505
llvm::GCNSubtarget::hasAtomicPkFaddNoRtnInsts
bool hasAtomicPkFaddNoRtnInsts() const
Definition: GCNSubtarget.h:750
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:1008
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:137
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:428
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:903
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::HasMADIntraFwdBug
bool HasMADIntraFwdBug
Definition: GCNSubtarget.h:196
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:1132
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:914
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:1091
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:403
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::HasAtomicPkFaddNoRtnInsts
bool HasAtomicPkFaddNoRtnInsts
Definition: GCNSubtarget.h:152
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:876
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:103
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:956
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:996
llvm::GCNSubtarget::NSAMaxSize
unsigned NSAMaxSize
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:657
llvm::GCNSubtarget::hasVOP3DPP
bool hasVOP3DPP() const
Definition: GCNSubtarget.h:1056
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:164
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:519
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:181
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:176
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1212
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:163
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:499
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:1004
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:962
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:194
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:200
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:333
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:912
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:601
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:678
llvm::GCNSubtarget::HasAtomicFaddNoRtnInsts
bool HasAtomicFaddNoRtnInsts
Definition: GCNSubtarget.h:151
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:330
llvm::GCNSubtarget::hasFlatScratchSVSMode
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:590
llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:884
llvm::GCNSubtarget::hasAtomicFaddNoRtnInsts
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:748
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:154
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:144
llvm::GCNSubtarget::hasNullExportTarget
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
Definition: GCNSubtarget.h:1080
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1223
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:975
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1147
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:535
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:722
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:202
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:450
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:187
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1050
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:628
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:884
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:778
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:815
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:891
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1299
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:286
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:63
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:347
llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:1035
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:387
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:1016
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:149
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::GCNSubtarget::HasFlatAtomicFaddF32Inst
bool HasFlatAtomicFaddF32Inst
Definition: GCNSubtarget.h:153
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:73
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:301
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:609
llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard
bool hasFPAtomicToDenormModeHazard() const
Definition: GCNSubtarget.h:1052
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:473
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:742
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:571
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:714
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:844
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:930
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:992
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:118
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:1114
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:407
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:864
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:592
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition: GCNSubtarget.h:136
llvm::GCNSubtarget::hasVALUTransUseHazard
bool hasVALUTransUseHazard() const
Definition: GCNSubtarget.h:1064
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:868
llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:112
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:165
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:872
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:861
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:395
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:621
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1276
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:802
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:710
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:754
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:62
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1153
llvm::GCNSubtarget::hasInstPrefetch
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:786
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:946
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:586
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:563
llvm::GCNSubtarget::supportsBackOffBarrier
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:515
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:72
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:654
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:105
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::hasAtomicFaddRtnInsts
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:746
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1202
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:1042
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:147
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:804
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:808
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:102
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:139
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:125
llvm::GCNSubtarget::hasUserSGPRInit16Bug
bool hasUserSGPRInit16Bug() const
Definition: GCNSubtarget.h:952
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:325
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:160
llvm::GCNSubtarget::hasDot8Insts
bool hasDot8Insts() const
Definition: GCNSubtarget.h:726
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:661
llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition: GCNSubtarget.h:1120
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:252
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:158
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1305
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:143
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:600
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:860
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:186
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:185
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:906
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:1048
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:359
llvm::GCNSubtarget::HasFP8Insts
bool HasFP8Insts
Definition: GCNSubtarget.h:148
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:493
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:635
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:822
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1015
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:208
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:177
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:220
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:339
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:94
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:267
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1217
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::GCNSubtarget::hasLegacyGeometry
bool hasLegacyGeometry() const
Definition: GCNSubtarget.h:1129
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: GCNSubtarget.h:596
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:391
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:551
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:170
AMDGPUCallLowering.h
llvm::GCNSubtarget::hasMADIntraFwdBug
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:916
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:145
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:559
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:184
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:1124
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:97
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition: AMDGPUSubtarget.cpp:638
llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:957
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:363
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:257
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:543
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:1137
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:851
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:625
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:940
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:1021
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:261
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:100
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:66
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:782
llvm::GCNSubtarget::hasDelayAlu
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
Definition: GCNSubtarget.h:1089
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:979
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:127
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:309
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1182
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:260
llvm::GCNSubtarget::EnableFlatScratch
bool EnableFlatScratch
Definition: GCNSubtarget.h:173
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:1000
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:375
llvm::GCNSubtarget::hasVALUMaskWriteHazard
bool hasVALUMaskWriteHazard() const
Definition: GCNSubtarget.h:1066
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:163
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:952
llvm::GCNSubtarget::HasAtomicFaddRtnInsts
bool HasAtomicFaddRtnInsts
Definition: GCNSubtarget.h:150
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:181
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:58
llvm::GCNSubtarget::HasGFX11FullVGPRs
bool HasGFX11FullVGPRs
Definition: GCNSubtarget.h:195
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:123
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:313
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1142
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:966
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:204
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:992
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:690
llvm::CallLowering
Definition: CallLowering.h:44
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:667
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:414
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:670
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:974
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:762
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:168
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:730
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:133
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:423
llvm::GCNSubtarget::hasLshlAddB64
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:938
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:605
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:131
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:232
llvm::GCNSubtarget::hasReadM0LdsDirectHazard
bool hasReadM0LdsDirectHazard() const
Definition: GCNSubtarget.h:988