LLVM  15.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 #define GET_SUBTARGETINFO_HEADER
25 #include "AMDGPUGenSubtargetInfo.inc"
26 
27 namespace llvm {
28 
29 class GCNTargetMachine;
30 
31 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
32  public AMDGPUSubtarget {
33 
35 
36 public:
37  // Following 2 enums are documented at:
38  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
39  enum class TrapHandlerAbi {
40  NONE = 0x00,
41  AMDHSA = 0x01,
42  };
43 
44  enum class TrapID {
45  LLVMAMDHSATrap = 0x02,
46  LLVMAMDHSADebugTrap = 0x03,
47  };
48 
49 private:
50  /// GlobalISel related APIs.
51  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
52  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
53  std::unique_ptr<InstructionSelector> InstSelector;
54  std::unique_ptr<LegalizerInfo> Legalizer;
55  std::unique_ptr<RegisterBankInfo> RegBankInfo;
56 
57 protected:
58  // Basic subtarget description.
61  unsigned Gen = INVALID;
63  int LDSBankCount = 0;
64  unsigned MaxPrivateElementSize = 0;
65 
66  // Possibly statically set by tablegen, but may want to be overridden.
67  bool FastFMAF32 = false;
68  bool FastDenormalF32 = false;
69  bool HalfRate64Ops = false;
70  bool FullRate64Ops = false;
71 
72  // Dynamically set bits that enable features.
73  bool FlatForGlobal = false;
75  bool UnalignedScratchAccess = false;
76  bool UnalignedAccessMode = false;
77  bool HasApertureRegs = false;
78  bool SupportsXNACK = false;
79 
80  // This should not be used directly. 'TargetID' tracks the dynamic settings
81  // for XNACK.
82  bool EnableXNACK = false;
83 
84  bool EnableTgSplit = false;
85  bool EnableCuMode = false;
86  bool TrapHandler = false;
87 
88  // Used as options.
89  bool EnableLoadStoreOpt = false;
91  bool EnableSIScheduler = false;
92  bool EnableDS128 = false;
93  bool EnablePRTStrictNull = false;
94  bool DumpCode = false;
95 
96  // Subtarget statically properties set by tablegen
97  bool FP64 = false;
98  bool FMA = false;
99  bool MIMG_R128 = false;
100  bool CIInsts = false;
101  bool GFX8Insts = false;
102  bool GFX9Insts = false;
103  bool GFX90AInsts = false;
104  bool GFX940Insts = false;
105  bool GFX10Insts = false;
106  bool GFX11Insts = false;
107  bool GFX10_3Insts = false;
108  bool GFX7GFX8GFX9Insts = false;
109  bool SGPRInitBug = false;
112  bool HasSMemRealTime = false;
113  bool HasIntClamp = false;
114  bool HasFmaMixInsts = false;
115  bool HasMovrel = false;
116  bool HasVGPRIndexMode = false;
117  bool HasScalarStores = false;
118  bool HasScalarAtomics = false;
119  bool HasSDWAOmod = false;
120  bool HasSDWAScalar = false;
121  bool HasSDWASdst = false;
122  bool HasSDWAMac = false;
123  bool HasSDWAOutModsVOPC = false;
124  bool HasDPP = false;
125  bool HasDPP8 = false;
126  bool Has64BitDPP = false;
127  bool HasPackedFP32Ops = false;
128  bool HasImageInsts = false;
129  bool HasExtendedImageInsts = false;
130  bool HasR128A16 = false;
131  bool HasGFX10A16 = false;
132  bool HasG16 = false;
133  bool HasNSAEncoding = false;
134  unsigned NSAMaxSize = 0;
135  bool GFX10_AEncoding = false;
136  bool GFX10_BEncoding = false;
137  bool HasDLInsts = false;
138  bool HasDot1Insts = false;
139  bool HasDot2Insts = false;
140  bool HasDot3Insts = false;
141  bool HasDot4Insts = false;
142  bool HasDot5Insts = false;
143  bool HasDot6Insts = false;
144  bool HasDot7Insts = false;
145  bool HasDot8Insts = false;
146  bool HasMAIInsts = false;
147  bool HasPkFmacF16Inst = false;
148  bool HasAtomicFaddRtnInsts = false;
151  bool SupportsSRAMECC = false;
152 
153  // This should not be used directly. 'TargetID' tracks the dynamic settings
154  // for SRAMECC.
155  bool EnableSRAMECC = false;
156 
157  bool HasNoSdstCMPX = false;
158  bool HasVscnt = false;
159  bool HasGetWaveIdInst = false;
160  bool HasSMemTimeInst = false;
162  bool HasVOP3Literal = false;
163  bool HasNoDataDepHazard = false;
164  bool FlatAddressSpace = false;
165  bool FlatInstOffsets = false;
166  bool FlatGlobalInsts = false;
167  bool FlatScratchInsts = false;
170  bool EnableFlatScratch = false;
171  bool AddNoCarryInsts = false;
172  bool HasUnpackedD16VMem = false;
173  bool LDSMisalignedBug = false;
175  bool UnalignedBufferAccess = false;
176  bool UnalignedDSAccess = false;
177  bool HasPackedTID = false;
178  bool ScalarizeGlobal = false;
179 
183  bool HasInstFwdPrefetchBug = false;
184  bool HasVcmpxExecWARHazard = false;
186  bool HasNSAtoVMEMBug = false;
187  bool HasNSAClauseBug = false;
188  bool HasOffset3fBug = false;
190  bool HasImageStoreD16Bug = false;
191  bool HasImageGather4D16Bug = false;
192  bool HasVOPDInsts = false;
193 
194  // Dummy feature to use for assembler in tablegen.
195  bool FeatureDisable = false;
196 
198 private:
199  SIInstrInfo InstrInfo;
200  SITargetLowering TLInfo;
201  SIFrameLowering FrameLowering;
202 
203 public:
204  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
205  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
206 
207  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
208  const GCNTargetMachine &TM);
209  ~GCNSubtarget() override;
210 
212  StringRef GPU, StringRef FS);
213 
214  const SIInstrInfo *getInstrInfo() const override {
215  return &InstrInfo;
216  }
217 
218  const SIFrameLowering *getFrameLowering() const override {
219  return &FrameLowering;
220  }
221 
222  const SITargetLowering *getTargetLowering() const override {
223  return &TLInfo;
224  }
225 
226  const SIRegisterInfo *getRegisterInfo() const override {
227  return &InstrInfo.getRegisterInfo();
228  }
229 
230  const CallLowering *getCallLowering() const override {
231  return CallLoweringInfo.get();
232  }
233 
234  const InlineAsmLowering *getInlineAsmLowering() const override {
235  return InlineAsmLoweringInfo.get();
236  }
237 
239  return InstSelector.get();
240  }
241 
242  const LegalizerInfo *getLegalizerInfo() const override {
243  return Legalizer.get();
244  }
245 
246  const RegisterBankInfo *getRegBankInfo() const override {
247  return RegBankInfo.get();
248  }
249 
251  return TargetID;
252  }
253 
254  // Nothing implemented, just prevent crashes on use.
255  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
256  return &TSInfo;
257  }
258 
259  const InstrItineraryData *getInstrItineraryData() const override {
260  return &InstrItins;
261  }
262 
264 
266  return (Generation)Gen;
267  }
268 
269  /// Return the number of high bits known to be zero for a frame index.
272  }
273 
274  int getLDSBankCount() const {
275  return LDSBankCount;
276  }
277 
278  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
279  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
280  }
281 
282  unsigned getConstantBusLimit(unsigned Opcode) const;
283 
284  /// Returns if the result of this instruction with a 16-bit result returned in
285  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
286  /// the original value.
287  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
288 
289  bool hasIntClamp() const {
290  return HasIntClamp;
291  }
292 
293  bool hasFP64() const {
294  return FP64;
295  }
296 
297  bool hasMIMG_R128() const {
298  return MIMG_R128;
299  }
300 
301  bool hasHWFP64() const {
302  return FP64;
303  }
304 
305  bool hasFastFMAF32() const {
306  return FastFMAF32;
307  }
308 
309  bool hasHalfRate64Ops() const {
310  return HalfRate64Ops;
311  }
312 
313  bool hasFullRate64Ops() const {
314  return FullRate64Ops;
315  }
316 
317  bool hasAddr64() const {
319  }
320 
321  bool hasFlat() const {
323  }
324 
325  // Return true if the target only has the reverse operand versions of VALU
326  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
327  bool hasOnlyRevVALUShifts() const {
328  return getGeneration() >= VOLCANIC_ISLANDS;
329  }
330 
331  bool hasFractBug() const {
332  return getGeneration() == SOUTHERN_ISLANDS;
333  }
334 
335  bool hasBFE() const {
336  return true;
337  }
338 
339  bool hasBFI() const {
340  return true;
341  }
342 
343  bool hasBFM() const {
344  return hasBFE();
345  }
346 
347  bool hasBCNT(unsigned Size) const {
348  return true;
349  }
350 
351  bool hasFFBL() const {
352  return true;
353  }
354 
355  bool hasFFBH() const {
356  return true;
357  }
358 
359  bool hasMed3_16() const {
361  }
362 
363  bool hasMin3Max3_16() const {
365  }
366 
367  bool hasFmaMixInsts() const {
368  return HasFmaMixInsts;
369  }
370 
371  bool hasCARRY() const {
372  return true;
373  }
374 
375  bool hasFMA() const {
376  return FMA;
377  }
378 
379  bool hasSwap() const {
380  return GFX9Insts;
381  }
382 
383  bool hasScalarPackInsts() const {
384  return GFX9Insts;
385  }
386 
387  bool hasScalarMulHiInsts() const {
388  return GFX9Insts;
389  }
390 
393  }
394 
395  bool supportsGetDoorbellID() const {
396  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
397  return getGeneration() >= GFX9;
398  }
399 
400  /// True if the offset field of DS instructions works as expected. On SI, the
401  /// offset uses a 16-bit adder and does not always wrap properly.
402  bool hasUsableDSOffset() const {
403  return getGeneration() >= SEA_ISLANDS;
404  }
405 
408  }
409 
410  /// Condition output from div_scale is usable.
412  return getGeneration() != SOUTHERN_ISLANDS;
413  }
414 
415  /// Extra wait hazard is needed in some cases before
416  /// s_cbranch_vccnz/s_cbranch_vccz.
417  bool hasReadVCCZBug() const {
418  return getGeneration() <= SEA_ISLANDS;
419  }
420 
421  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
423  return getGeneration() >= GFX10;
424  }
425 
426  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
427  /// was written by a VALU instruction.
429  return getGeneration() == SOUTHERN_ISLANDS;
430  }
431 
432  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
433  /// SGPR was written by a VALU Instruction.
435  return getGeneration() >= VOLCANIC_ISLANDS;
436  }
437 
438  bool hasRFEHazards() const {
439  return getGeneration() >= VOLCANIC_ISLANDS;
440  }
441 
442  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
443  unsigned getSetRegWaitStates() const {
444  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
445  }
446 
447  bool dumpCode() const {
448  return DumpCode;
449  }
450 
451  /// Return the amount of LDS that can be used that will not restrict the
452  /// occupancy lower than WaveCount.
453  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
454  const Function &) const;
455 
458  }
459 
460  /// \returns If target supports S_DENORM_MODE.
461  bool hasDenormModeInst() const {
463  }
464 
465  bool useFlatForGlobal() const {
466  return FlatForGlobal;
467  }
468 
469  /// \returns If target supports ds_read/write_b128 and user enables generation
470  /// of ds_read/write_b128.
471  bool useDS128() const {
472  return CIInsts && EnableDS128;
473  }
474 
475  /// \return If target supports ds_read/write_b96/128.
476  bool hasDS96AndDS128() const {
477  return CIInsts;
478  }
479 
480  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
481  bool haveRoundOpsF64() const {
482  return CIInsts;
483  }
484 
485  /// \returns If MUBUF instructions always perform range checking, even for
486  /// buffer resources used for private memory access.
489  }
490 
491  /// \returns If target requires PRT Struct NULL support (zero result registers
492  /// for sparse texture support).
493  bool usePRTStrictNull() const {
494  return EnablePRTStrictNull;
495  }
496 
499  }
500 
502  return UnalignedBufferAccess;
503  }
504 
507  }
508 
509  bool hasUnalignedDSAccess() const {
510  return UnalignedDSAccess;
511  }
512 
515  }
516 
518  return UnalignedScratchAccess;
519  }
520 
521  bool hasUnalignedAccessMode() const {
522  return UnalignedAccessMode;
523  }
524 
525  bool hasApertureRegs() const {
526  return HasApertureRegs;
527  }
528 
529  bool isTrapHandlerEnabled() const {
530  return TrapHandler;
531  }
532 
533  bool isXNACKEnabled() const {
534  return TargetID.isXnackOnOrAny();
535  }
536 
537  bool isTgSplitEnabled() const {
538  return EnableTgSplit;
539  }
540 
541  bool isCuModeEnabled() const {
542  return EnableCuMode;
543  }
544 
545  bool hasFlatAddressSpace() const {
546  return FlatAddressSpace;
547  }
548 
549  bool hasFlatScrRegister() const {
550  return hasFlatAddressSpace();
551  }
552 
553  bool hasFlatInstOffsets() const {
554  return FlatInstOffsets;
555  }
556 
557  bool hasFlatGlobalInsts() const {
558  return FlatGlobalInsts;
559  }
560 
561  bool hasFlatScratchInsts() const {
562  return FlatScratchInsts;
563  }
564 
565  // Check if target supports ST addressing mode with FLAT scratch instructions.
566  // The ST addressing mode means no registers are used, either VGPR or SGPR,
567  // but only immediate offset is swizzled and added to the FLAT scratch base.
568  bool hasFlatScratchSTMode() const {
570  }
571 
572  bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
573 
575  return ScalarFlatScratchInsts;
576  }
577 
578  bool enableFlatScratch() const {
579  return flatScratchIsArchitected() ||
581  }
582 
583  bool hasGlobalAddTidInsts() const {
584  return GFX10_BEncoding;
585  }
586 
587  bool hasAtomicCSub() const {
588  return GFX10_BEncoding;
589  }
590 
592  return getGeneration() >= GFX9;
593  }
594 
595  bool hasFlatSegmentOffsetBug() const {
597  }
598 
600  return getGeneration() > GFX9;
601  }
602 
603  bool hasD16LoadStore() const {
604  return getGeneration() >= GFX9;
605  }
606 
607  bool d16PreservesUnusedBits() const {
609  }
610 
611  bool hasD16Images() const {
612  return getGeneration() >= VOLCANIC_ISLANDS;
613  }
614 
615  /// Return if most LDS instructions have an m0 use that require m0 to be
616  /// initialized.
617  bool ldsRequiresM0Init() const {
618  return getGeneration() < GFX9;
619  }
620 
621  // True if the hardware rewinds and replays GWS operations if a wave is
622  // preempted.
623  //
624  // If this is false, a GWS operation requires testing if a nack set the
625  // MEM_VIOL bit, and repeating if so.
626  bool hasGWSAutoReplay() const {
627  return getGeneration() >= GFX9;
628  }
629 
630  /// \returns if target has ds_gws_sema_release_all instruction.
631  bool hasGWSSemaReleaseAll() const {
632  return CIInsts;
633  }
634 
635  /// \returns true if the target has integer add/sub instructions that do not
636  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
637  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
638  /// for saturation.
639  bool hasAddNoCarry() const {
640  return AddNoCarryInsts;
641  }
642 
643  bool hasUnpackedD16VMem() const {
644  return HasUnpackedD16VMem;
645  }
646 
647  // Covers VS/PS/CS graphics shaders
648  bool isMesaGfxShader(const Function &F) const {
649  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
650  }
651 
652  bool hasMad64_32() const {
653  return getGeneration() >= SEA_ISLANDS;
654  }
655 
656  bool hasSDWAOmod() const {
657  return HasSDWAOmod;
658  }
659 
660  bool hasSDWAScalar() const {
661  return HasSDWAScalar;
662  }
663 
664  bool hasSDWASdst() const {
665  return HasSDWASdst;
666  }
667 
668  bool hasSDWAMac() const {
669  return HasSDWAMac;
670  }
671 
672  bool hasSDWAOutModsVOPC() const {
673  return HasSDWAOutModsVOPC;
674  }
675 
676  bool hasDLInsts() const {
677  return HasDLInsts;
678  }
679 
680  bool hasDot1Insts() const {
681  return HasDot1Insts;
682  }
683 
684  bool hasDot2Insts() const {
685  return HasDot2Insts;
686  }
687 
688  bool hasDot3Insts() const {
689  return HasDot3Insts;
690  }
691 
692  bool hasDot4Insts() const {
693  return HasDot4Insts;
694  }
695 
696  bool hasDot5Insts() const {
697  return HasDot5Insts;
698  }
699 
700  bool hasDot6Insts() const {
701  return HasDot6Insts;
702  }
703 
704  bool hasDot7Insts() const {
705  return HasDot7Insts;
706  }
707 
708  bool hasDot8Insts() const {
709  return HasDot8Insts;
710  }
711 
712  bool hasMAIInsts() const {
713  return HasMAIInsts;
714  }
715 
716  bool hasPkFmacF16Inst() const {
717  return HasPkFmacF16Inst;
718  }
719 
720  bool hasAtomicFaddInsts() const {
722  }
723 
725 
727 
729 
730  bool hasNoSdstCMPX() const {
731  return HasNoSdstCMPX;
732  }
733 
734  bool hasVscnt() const {
735  return HasVscnt;
736  }
737 
738  bool hasGetWaveIdInst() const {
739  return HasGetWaveIdInst;
740  }
741 
742  bool hasSMemTimeInst() const {
743  return HasSMemTimeInst;
744  }
745 
746  bool hasShaderCyclesRegister() const {
748  }
749 
750  bool hasVOP3Literal() const {
751  return HasVOP3Literal;
752  }
753 
754  bool hasNoDataDepHazard() const {
755  return HasNoDataDepHazard;
756  }
757 
759  return getGeneration() < SEA_ISLANDS;
760  }
761 
762  // Scratch is allocated in 256 dword per wave blocks for the entire
763  // wavefront. When viewed from the perspective of an arbitrary workitem, this
764  // is 4-byte aligned.
765  //
766  // Only 4-byte alignment is really needed to access anything. Transformations
767  // on the pointer value itself may rely on the alignment / known low bits of
768  // the pointer. Set this to something above the minimum to avoid needing
769  // dynamic realignment in common cases.
770  Align getStackAlignment() const { return Align(16); }
771 
772  bool enableMachineScheduler() const override {
773  return true;
774  }
775 
776  bool useAA() const override;
777 
778  bool enableSubRegLiveness() const override {
779  return true;
780  }
781 
784 
785  // static wrappers
786  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
787 
788  // XXX - Why is this here if it isn't in the default pass set?
789  bool enableEarlyIfConversion() const override {
790  return true;
791  }
792 
794  unsigned NumRegionInstrs) const override;
795 
796  unsigned getMaxNumUserSGPRs() const {
797  return 16;
798  }
799 
800  bool hasSMemRealTime() const {
801  return HasSMemRealTime;
802  }
803 
804  bool hasMovrel() const {
805  return HasMovrel;
806  }
807 
808  bool hasVGPRIndexMode() const {
809  return HasVGPRIndexMode;
810  }
811 
812  bool useVGPRIndexMode() const;
813 
814  bool hasScalarCompareEq64() const {
815  return getGeneration() >= VOLCANIC_ISLANDS;
816  }
817 
818  bool hasScalarStores() const {
819  return HasScalarStores;
820  }
821 
822  bool hasScalarAtomics() const {
823  return HasScalarAtomics;
824  }
825 
826  bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
827 
828  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
829  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
830 
831  /// \returns true if the subtarget has the v_permlane64_b32 instruction.
832  bool hasPermLane64() const { return getGeneration() >= GFX11; }
833 
834  bool hasDPP() const {
835  return HasDPP;
836  }
837 
838  bool hasDPPBroadcasts() const {
839  return HasDPP && getGeneration() < GFX10;
840  }
841 
842  bool hasDPPWavefrontShifts() const {
843  return HasDPP && getGeneration() < GFX10;
844  }
845 
846  bool hasDPP8() const {
847  return HasDPP8;
848  }
849 
850  bool has64BitDPP() const {
851  return Has64BitDPP;
852  }
853 
854  bool hasPackedFP32Ops() const {
855  return HasPackedFP32Ops;
856  }
857 
858  bool hasFmaakFmamkF32Insts() const {
859  return getGeneration() >= GFX10 || hasGFX940Insts();
860  }
861 
862  bool hasImageInsts() const {
863  return HasImageInsts;
864  }
865 
866  bool hasExtendedImageInsts() const {
867  return HasExtendedImageInsts;
868  }
869 
870  bool hasR128A16() const {
871  return HasR128A16;
872  }
873 
874  bool hasGFX10A16() const {
875  return HasGFX10A16;
876  }
877 
878  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
879 
880  bool hasG16() const { return HasG16; }
881 
882  bool hasOffset3fBug() const {
883  return HasOffset3fBug;
884  }
885 
886  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
887 
889 
890  bool hasNSAEncoding() const { return HasNSAEncoding; }
891 
892  unsigned getNSAMaxSize() const { return NSAMaxSize; }
893 
894  bool hasGFX10_AEncoding() const {
895  return GFX10_AEncoding;
896  }
897 
898  bool hasGFX10_BEncoding() const {
899  return GFX10_BEncoding;
900  }
901 
902  bool hasGFX10_3Insts() const {
903  return GFX10_3Insts;
904  }
905 
906  bool hasMadF16() const;
907 
908  bool hasMovB64() const { return GFX940Insts; }
909 
910  bool hasLshlAddB64() const { return GFX940Insts; }
911 
912  bool enableSIScheduler() const {
913  return EnableSIScheduler;
914  }
915 
916  bool loadStoreOptEnabled() const {
917  return EnableLoadStoreOpt;
918  }
919 
920  bool hasSGPRInitBug() const {
921  return SGPRInitBug;
922  }
923 
925 
928  }
929 
930  bool hasMFMAInlineLiteralBug() const {
932  }
933 
934  bool has12DWordStoreHazard() const {
936  }
937 
938  // \returns true if the subtarget supports DWORDX3 load/store instructions.
939  bool hasDwordx3LoadStores() const {
940  return CIInsts;
941  }
942 
945  }
946 
947  bool hasReadM0SendMsgHazard() const {
950  }
951 
952  bool hasReadM0LdsDmaHazard() const {
954  }
955 
958  }
959 
960  bool hasVcmpxPermlaneHazard() const {
961  return HasVcmpxPermlaneHazard;
962  }
963 
966  }
967 
970  }
971 
972  bool hasLDSMisalignedBug() const {
973  return LDSMisalignedBug && !EnableCuMode;
974  }
975 
976  bool hasInstFwdPrefetchBug() const {
977  return HasInstFwdPrefetchBug;
978  }
979 
980  bool hasVcmpxExecWARHazard() const {
981  return HasVcmpxExecWARHazard;
982  }
983 
986  }
987 
988  // Has one cycle hazard on transcendental instruction feeding a
989  // non transcendental VALU.
990  bool hasTransForwardingHazard() const { return GFX940Insts; }
991 
992  // Has one cycle hazard on a VALU instruction partially writing dst with
993  // a shift of result bits feeding another VALU instruction.
994  bool hasDstSelForwardingHazard() const { return GFX940Insts; }
995 
996  // Cannot use op_sel with v_dot instructions.
997  bool hasDOTOpSelHazard() const { return GFX940Insts; }
998 
999  // Does not have HW interlocs for VALU writing and then reading SGPRs.
1000  bool hasVDecCoExecHazard() const {
1001  return GFX940Insts;
1002  }
1003 
1004  bool hasNSAtoVMEMBug() const {
1005  return HasNSAtoVMEMBug;
1006  }
1007 
1008  bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1009 
1010  bool hasHardClauses() const { return getGeneration() >= GFX10; }
1011 
1012  bool hasGFX90AInsts() const { return GFX90AInsts; }
1013 
1014  bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1015 
1016  bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1017 
1019  return getGeneration() >= GFX11;
1020  }
1021 
1022  bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; }
1023 
1024  /// Return if operations acting on VGPR tuples require even alignment.
1025  bool needsAlignedVGPRs() const { return GFX90AInsts; }
1026 
1027  /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1028  bool hasSPackHL() const { return GFX11Insts; }
1029 
1030  /// Return true if the target's EXP instruction has the COMPR flag, which
1031  /// affects the meaning of the EN (enable) bits.
1032  bool hasCompressedExport() const { return !GFX11Insts; }
1033 
1034  /// Return true if the target's EXP instruction supports the NULL export
1035  /// target.
1036  bool hasNullExportTarget() const { return !GFX11Insts; }
1037 
1038  bool hasVOPDInsts() const { return HasVOPDInsts; }
1039 
1040  /// Return true if the target has the S_DELAY_ALU instruction.
1041  bool hasDelayAlu() const { return GFX11Insts; }
1042 
1043  bool hasPackedTID() const { return HasPackedTID; }
1044 
1045  // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1046  // hasGFX90AInsts is also true.
1047  bool hasGFX940Insts() const { return GFX940Insts; }
1048 
1049  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1050  /// SGPRs
1051  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1052 
1053  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1054  /// VGPRs
1055  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1056 
1057  /// Return occupancy for the given function. Used LDS and a number of
1058  /// registers if provided.
1059  /// Note, occupancy can be affected by the scratch allocation as well, but
1060  /// we do not have enough information to compute it.
1061  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1062  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1063 
1064  /// \returns true if the flat_scratch register should be initialized with the
1065  /// pointer to the wave's scratch memory rather than a size and offset.
1066  bool flatScratchIsPointer() const {
1068  }
1069 
1070  /// \returns true if the flat_scratch register is initialized by the HW.
1071  /// In this case it is readonly.
1073 
1074  /// \returns true if the machine has merged shaders in which s0-s7 are
1075  /// reserved by the hardware and user SGPRs start at s8
1076  bool hasMergedShaders() const {
1077  return getGeneration() >= GFX9;
1078  }
1079 
1080  // \returns true if the target supports the pre-NGG legacy geometry path.
1081  bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1082 
1083  /// \returns SGPR allocation granularity supported by the subtarget.
1084  unsigned getSGPRAllocGranule() const {
1086  }
1087 
1088  /// \returns SGPR encoding granularity supported by the subtarget.
1089  unsigned getSGPREncodingGranule() const {
1091  }
1092 
1093  /// \returns Total number of SGPRs supported by the subtarget.
1094  unsigned getTotalNumSGPRs() const {
1095  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1096  }
1097 
1098  /// \returns Addressable number of SGPRs supported by the subtarget.
1099  unsigned getAddressableNumSGPRs() const {
1101  }
1102 
1103  /// \returns Minimum number of SGPRs that meets the given number of waves per
1104  /// execution unit requirement supported by the subtarget.
1105  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1106  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1107  }
1108 
1109  /// \returns Maximum number of SGPRs that meets the given number of waves per
1110  /// execution unit requirement supported by the subtarget.
1111  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1112  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1113  }
1114 
1115  /// \returns Reserved number of SGPRs. This is common
1116  /// utility function called by MachineFunction and
1117  /// Function variants of getReservedNumSGPRs.
1118  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1119  /// \returns Reserved number of SGPRs for given machine function \p MF.
1120  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1121 
1122  /// \returns Reserved number of SGPRs for given function \p F.
1123  unsigned getReservedNumSGPRs(const Function &F) const;
1124 
1125  /// \returns max num SGPRs. This is the common utility
1126  /// function called by MachineFunction and Function
1127  /// variants of getMaxNumSGPRs.
1128  unsigned getBaseMaxNumSGPRs(const Function &F,
1129  std::pair<unsigned, unsigned> WavesPerEU,
1130  unsigned PreloadedSGPRs,
1131  unsigned ReservedNumSGPRs) const;
1132 
1133  /// \returns Maximum number of SGPRs that meets number of waves per execution
1134  /// unit requirement for function \p MF, or number of SGPRs explicitly
1135  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1136  ///
1137  /// \returns Value that meets number of waves per execution unit requirement
1138  /// if explicitly requested value cannot be converted to integer, violates
1139  /// subtarget's specifications, or does not meet number of waves per execution
1140  /// unit requirement.
1141  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1142 
1143  /// \returns Maximum number of SGPRs that meets number of waves per execution
1144  /// unit requirement for function \p F, or number of SGPRs explicitly
1145  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1146  ///
1147  /// \returns Value that meets number of waves per execution unit requirement
1148  /// if explicitly requested value cannot be converted to integer, violates
1149  /// subtarget's specifications, or does not meet number of waves per execution
1150  /// unit requirement.
1151  unsigned getMaxNumSGPRs(const Function &F) const;
1152 
1153  /// \returns VGPR allocation granularity supported by the subtarget.
1154  unsigned getVGPRAllocGranule() const {
1156  }
1157 
1158  /// \returns VGPR encoding granularity supported by the subtarget.
1159  unsigned getVGPREncodingGranule() const {
1161  }
1162 
1163  /// \returns Total number of VGPRs supported by the subtarget.
1164  unsigned getTotalNumVGPRs() const {
1165  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1166  }
1167 
1168  /// \returns Addressable number of VGPRs supported by the subtarget.
1169  unsigned getAddressableNumVGPRs() const {
1171  }
1172 
1173  /// \returns Minimum number of VGPRs that meets given number of waves per
1174  /// execution unit requirement supported by the subtarget.
1175  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1176  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1177  }
1178 
1179  /// \returns Maximum number of VGPRs that meets given number of waves per
1180  /// execution unit requirement supported by the subtarget.
1181  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1182  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1183  }
1184 
1185  /// \returns max num VGPRs. This is the common utility function
1186  /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1187  unsigned getBaseMaxNumVGPRs(const Function &F,
1188  std::pair<unsigned, unsigned> WavesPerEU) const;
1189  /// \returns Maximum number of VGPRs that meets number of waves per execution
1190  /// unit requirement for function \p F, or number of VGPRs explicitly
1191  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1192  ///
1193  /// \returns Value that meets number of waves per execution unit requirement
1194  /// if explicitly requested value cannot be converted to integer, violates
1195  /// subtarget's specifications, or does not meet number of waves per execution
1196  /// unit requirement.
1197  unsigned getMaxNumVGPRs(const Function &F) const;
1198 
1199  unsigned getMaxNumAGPRs(const Function &F) const {
1200  return getMaxNumVGPRs(F);
1201  }
1202 
1203  /// \returns Maximum number of VGPRs that meets number of waves per execution
1204  /// unit requirement for function \p MF, or number of VGPRs explicitly
1205  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1206  ///
1207  /// \returns Value that meets number of waves per execution unit requirement
1208  /// if explicitly requested value cannot be converted to integer, violates
1209  /// subtarget's specifications, or does not meet number of waves per execution
1210  /// unit requirement.
1211  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1212 
1213  void getPostRAMutations(
1214  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1215  const override;
1216 
1217  std::unique_ptr<ScheduleDAGMutation>
1219 
1220  bool isWave32() const {
1221  return getWavefrontSize() == 32;
1222  }
1223 
1224  bool isWave64() const {
1225  return getWavefrontSize() == 64;
1226  }
1227 
1229  return getRegisterInfo()->getBoolRC();
1230  }
1231 
1232  /// \returns Maximum number of work groups per compute unit supported by the
1233  /// subtarget and limited by given \p FlatWorkGroupSize.
1234  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1235  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1236  }
1237 
1238  /// \returns Minimum flat work group size supported by the subtarget.
1239  unsigned getMinFlatWorkGroupSize() const override {
1241  }
1242 
1243  /// \returns Maximum flat work group size supported by the subtarget.
1244  unsigned getMaxFlatWorkGroupSize() const override {
1246  }
1247 
1248  /// \returns Number of waves per execution unit required to support the given
1249  /// \p FlatWorkGroupSize.
1250  unsigned
1251  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1252  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1253  }
1254 
1255  /// \returns Minimum number of waves per execution unit supported by the
1256  /// subtarget.
1257  unsigned getMinWavesPerEU() const override {
1258  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1259  }
1260 
1261  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1262  SDep &Dep) const override;
1263 
1264  // \returns true if it's beneficial on this subtarget for the scheduler to
1265  // cluster stores as well as loads.
1266  bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1267 };
1268 
1269 } // end namespace llvm
1270 
1271 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::GCNSubtarget::shouldClusterStores
bool shouldClusterStores() const
Definition: GCNSubtarget.h:1266
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:387
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::hasReadM0LdsDmaHazard
bool hasReadM0LdsDmaHazard() const
Definition: GCNSubtarget.h:952
llvm::GCNSubtarget::hasVDecCoExecHazard
bool hasVDecCoExecHazard() const
Definition: GCNSubtarget.h:1000
llvm::GCNSubtarget::HasImageInsts
bool HasImageInsts
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:61
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:874
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:343
llvm::GCNSubtarget::hasPermLane64
bool hasPermLane64() const
Definition: GCNSubtarget.h:832
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:684
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:659
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:626
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:599
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:39
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:159
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:246
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:668
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:258
llvm::GCNSubtarget::hasDstSelForwardingHazard
bool hasDstSelForwardingHazard() const
Definition: GCNSubtarget.h:994
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:808
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:664
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:218
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:60
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:611
llvm::GCNSubtarget::GFX11Insts
bool GFX11Insts
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:93
llvm::Function
Definition: Function.h:60
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:139
llvm::GCNSubtarget::hasImageInsts
bool hasImageInsts() const
Definition: GCNSubtarget.h:862
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:127
llvm::GCNSubtarget::hasVALUPartialForwardingHazard
bool hasVALUPartialForwardingHazard() const
Definition: GCNSubtarget.h:1018
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:150
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:166
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:164
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:137
llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition: GCNSubtarget.h:1008
llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:892
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:804
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasMovB64
bool hasMovB64() const
Definition: GCNSubtarget.h:908
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:434
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:829
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:746
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:1025
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:561
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:176
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:293
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:62
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:190
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:497
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:456
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:595
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:141
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:476
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::hasSPackHL
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
Definition: GCNSubtarget.h:1028
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:443
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::GCNSubtarget::hasVOPDInsts
bool hasVOPDInsts() const
Definition: GCNSubtarget.h:1038
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:129
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:557
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:371
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:471
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1234
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1244
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:529
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:688
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:789
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:676
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:331
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:939
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:890
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:255
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:123
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:626
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:770
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:513
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:422
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:189
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:916
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:60
llvm::GCNSubtarget::HasDot8Insts
bool HasDot8Insts
Definition: GCNSubtarget.h:145
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:738
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:186
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:118
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:168
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:814
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:854
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:110
llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition: GCNSubtarget.h:187
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:814
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:614
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:226
llvm::GCNSubtarget::hasCompressedExport
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
Definition: GCNSubtarget.h:1032
llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42
llvm::GCNSubtarget::hasLDSFPAtomicAdd
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:826
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:188
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::hasLdsDirect
bool hasLdsDirect() const
Definition: GCNSubtarget.h:1016
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:214
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:406
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:505
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:309
llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:894
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:631
llvm::GCNSubtarget::getMaxNumAGPRs
unsigned getMaxNumAGPRs(const Function &F) const
Definition: GCNSubtarget.h:1199
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:121
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:668
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:120
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:163
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:570
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:105
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:270
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:549
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1576
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:305
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:878
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:772
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:339
llvm::GCNSubtarget::GFX940Insts
bool GFX940Insts
Definition: GCNSubtarget.h:104
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1111
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:591
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:317
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:465
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:367
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:112
llvm::Legalizer
Definition: Legalizer.h:36
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:521
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:574
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:222
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:128
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:656
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:708
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:428
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:597
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:234
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:169
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:870
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:675
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:734
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1239
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:121
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:278
llvm::GCNSubtarget::hasGFX940Insts
bool hasGFX940Insts() const
Definition: GCNSubtarget.h:1047
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:750
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:78
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:355
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:742
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:417
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1220
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:920
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:74
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:800
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:359
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:238
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:648
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:645
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:898
llvm::GCNSubtarget::HasVOPDInsts
bool HasVOPDInsts
Definition: GCNSubtarget.h:192
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:86
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:716
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:44
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:537
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::SIFrameLowering
Definition: SIFrameLowering.h:16
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1181
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:700
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:63
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:447
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:822
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:509
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:783
llvm::GCNSubtarget::hasTransForwardingHazard
bool hasTransForwardingHazard() const
Definition: GCNSubtarget.h:990
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:866
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:680
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:882
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:980
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:200
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1159
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1224
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:185
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:90
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:205
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:70
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:242
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:171
llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:926
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:493
llvm::GCNSubtarget::hasAtomicPkFaddNoRtnInsts
bool hasAtomicPkFaddNoRtnInsts() const
Definition: GCNSubtarget.h:728
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:976
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:177
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:136
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:424
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:691
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:172
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:1084
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:888
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:1043
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:391
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::HasAtomicPkFaddNoRtnInsts
bool HasAtomicPkFaddNoRtnInsts
Definition: GCNSubtarget.h:150
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:850
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:103
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:113
llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:924
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:964
llvm::GCNSubtarget::NSAMaxSize
unsigned NSAMaxSize
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:639
llvm::GCNSubtarget::hasVOP3DPP
bool hasVOP3DPP() const
Definition: GCNSubtarget.h:1014
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:501
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:173
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1164
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:159
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:487
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:181
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:972
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:930
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:191
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:195
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:321
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:886
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:583
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:660
llvm::GCNSubtarget::HasAtomicFaddNoRtnInsts
bool HasAtomicFaddNoRtnInsts
Definition: GCNSubtarget.h:149
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:320
llvm::GCNSubtarget::hasFlatScratchSVSMode
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:572
llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:858
llvm::GCNSubtarget::hasAtomicFaddNoRtnInsts
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:726
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:151
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:143
llvm::GCNSubtarget::hasNullExportTarget
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
Definition: GCNSubtarget.h:1036
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1175
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:943
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1099
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:517
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:704
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:197
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:438
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1012
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:623
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:672
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:754
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:789
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:679
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1251
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:274
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:64
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:335
llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:997
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:375
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:984
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:147
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:73
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:289
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:591
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:461
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:720
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:553
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:696
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:818
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:902
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:777
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:1066
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:395
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:838
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:587
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:119
llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::hasVALUTransUseHazard
bool hasVALUTransUseHazard() const
Definition: GCNSubtarget.h:1022
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:842
llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:162
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:846
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:649
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:383
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:603
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1228
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:792
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:692
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:730
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1105
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:961
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:568
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:545
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:73
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:133
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:649
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:105
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::hasAtomicFaddRtnInsts
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:724
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1154
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:1004
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:146
llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition: AMDGPUSubtarget.h:32
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:778
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:782
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:102
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:138
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:124
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:313
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:157
llvm::GCNSubtarget::hasDot8Insts
bool hasDot8Insts() const
Definition: GCNSubtarget.h:708
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:643
llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition: GCNSubtarget.h:1072
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:250
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:155
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1257
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:142
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:595
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:834
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:183
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:182
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:880
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:1010
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:347
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:481
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:617
llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:178
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:796
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:797
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:125
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:201
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:174
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:327
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:94
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:265
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1169
llvm::SIInstrInfo
Definition: SIInstrInfo.h:43
llvm::GCNSubtarget::hasLegacyGeometry
bool hasLegacyGeometry() const
Definition: GCNSubtarget.h:1081
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: GCNSubtarget.h:578
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:379
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:533
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:167
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:144
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:541
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:180
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:1076
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:97
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition: AMDGPUSubtarget.cpp:633
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:351
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:255
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:525
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:1089
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:639
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:607
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:912
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:803
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:259
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:100
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:158
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:758
llvm::GCNSubtarget::hasDelayAlu
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
Definition: GCNSubtarget.h:1041
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:947
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:297
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1180
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:260
llvm::GCNSubtarget::EnableFlatScratch
bool EnableFlatScratch
Definition: GCNSubtarget.h:170
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:968
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:363
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:160
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::GCNSubtarget::createFillMFMAShadowMutation
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
Definition: AMDGPUSubtarget.cpp:967
llvm::GCNSubtarget::HasAtomicFaddRtnInsts
bool HasAtomicFaddRtnInsts
Definition: GCNSubtarget.h:148
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:179
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:59
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:122
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:301
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1094
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:934
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:204
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:960
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:672
llvm::CallLowering
Definition: CallLowering.h:44
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:662
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:402
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:652
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:762
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:752
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:165
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:712
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:411
llvm::GCNSubtarget::hasLshlAddB64
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:910
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:587
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:131
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:230
llvm::GCNSubtarget::hasReadM0LdsDirectHazard
bool hasReadM0LdsDirectHazard() const
Definition: GCNSubtarget.h:956