LLVM 23.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Possibly statically set by tablegen, but may want to be overridden.
70 bool FastDenormalF32 = false;
71 bool HalfRate64Ops = false;
72 bool FullRate64Ops = false;
73
74 // Dynamically set bits that enable features.
75 bool FlatForGlobal = false;
77 bool BackOffBarrier = false;
79 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasMcastLoadInsts = false;
170 bool HasCubeInsts = false;
171 bool HasLerpInst = false;
172 bool HasSadInsts = false;
173 bool HasQsadInsts = false;
174 bool HasCvtNormInsts = false;
177 bool HasFP8E5M3Insts = false;
178 bool HasCvtFP8Vop1Bug = false;
179 bool HasPkFmacF16Inst = false;
200 bool HasXF32Insts = false;
201 /// The maximum number of instructions that may be placed within an S_CLAUSE,
202 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
203 /// indicates a lack of S_CLAUSE support.
205 bool SupportsSRAMECC = false;
206 bool DynamicVGPR = false;
208 bool HasVMemToLDSLoad = false;
209 bool RequiresAlignVGPR = false;
210
211 // This should not be used directly. 'TargetID' tracks the dynamic settings
212 // for SRAMECC.
213 bool EnableSRAMECC = false;
214
215 bool HasNoSdstCMPX = false;
216 bool HasVscnt = false;
217 bool HasWaitXcnt = false;
218 bool HasGetWaveIdInst = false;
219 bool HasSMemTimeInst = false;
222 bool HasVOP3Literal = false;
223 bool HasNoDataDepHazard = false;
224 bool FlatAddressSpace = false;
225 bool FlatInstOffsets = false;
226 bool FlatGlobalInsts = false;
227 bool FlatScratchInsts = false;
228 bool FlatGVSMode = false;
231 bool EnableFlatScratch = false;
233 bool HasGDS = false;
234 bool HasGWS = false;
235 bool AddNoCarryInsts = false;
236 bool HasUnpackedD16VMem = false;
237 bool LDSMisalignedBug = false;
240 bool UnalignedDSAccess = false;
241 bool HasPackedTID = false;
242 bool ScalarizeGlobal = false;
243 bool HasSALUFloatInsts = false;
246 bool Has64BitLiterals = false;
249 bool HasBitOp3Insts = false;
250 bool HasTanhInsts = false;
253 bool HasPrngInst = false;
255 bool HasPermlane16Swap = false;
256 bool HasPermlane32Swap = false;
261 bool HasVmemPrefInsts = false;
263 bool HasSafeCUPrefetch = false;
266 bool HasNSAtoVMEMBug = false;
267 bool HasNSAClauseBug = false;
268 bool HasOffset3fBug = false;
274 bool Has1_5xVGPRs = false;
275 bool HasMADIntraFwdBug = false;
276 bool HasVOPDInsts = false;
280 bool HasAshrPkInsts = false;
284 bool HasMin3Max3PKF16 = false;
286 bool HasLshlAddU64Inst = false;
287 bool HasAddSubU64Insts = false;
288 bool HasMadU32Inst = false;
289 bool HasAddMinMaxInsts = false;
294 bool HasSWakeupBarrier = false;
295
296 bool RequiresCOV6 = false;
299
301
302 bool HasClusters = false;
304
305 // Dummy feature to use for assembler in tablegen.
306 bool FeatureDisable = false;
307
308private:
309 SIInstrInfo InstrInfo;
310 SITargetLowering TLInfo;
311 SIFrameLowering FrameLowering;
312
313public:
314 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
315 const GCNTargetMachine &TM);
316 ~GCNSubtarget() override;
317
319 StringRef GPU, StringRef FS);
320
321 /// Diagnose inconsistent subtarget features before attempting to codegen
322 /// function \p F.
323 void checkSubtargetFeatures(const Function &F) const;
324
325 const SIInstrInfo *getInstrInfo() const override {
326 return &InstrInfo;
327 }
328
329 const SIFrameLowering *getFrameLowering() const override {
330 return &FrameLowering;
331 }
332
333 const SITargetLowering *getTargetLowering() const override {
334 return &TLInfo;
335 }
336
337 const SIRegisterInfo *getRegisterInfo() const override {
338 return &InstrInfo.getRegisterInfo();
339 }
340
341 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
342
343 const CallLowering *getCallLowering() const override {
344 return CallLoweringInfo.get();
345 }
346
347 const InlineAsmLowering *getInlineAsmLowering() const override {
348 return InlineAsmLoweringInfo.get();
349 }
350
352 return InstSelector.get();
353 }
354
355 const LegalizerInfo *getLegalizerInfo() const override {
356 return Legalizer.get();
357 }
358
359 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
360 return RegBankInfo.get();
361 }
362
364 return TargetID;
365 }
366
368 return &InstrItins;
369 }
370
372
374 return (Generation)Gen;
375 }
376
377 unsigned getMaxWaveScratchSize() const {
378 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
379 if (getGeneration() >= GFX12) {
380 // 18-bit field in units of 64-dword.
381 return (64 * 4) * ((1 << 18) - 1);
382 }
383 if (getGeneration() == GFX11) {
384 // 15-bit field in units of 64-dword.
385 return (64 * 4) * ((1 << 15) - 1);
386 }
387 // 13-bit field in units of 256-dword.
388 return (256 * 4) * ((1 << 13) - 1);
389 }
390
391 /// Return the number of high bits known to be zero for a frame index.
395
396 int getLDSBankCount() const {
397 return LDSBankCount;
398 }
399
400 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
401 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
402 }
403
404 unsigned getConstantBusLimit(unsigned Opcode) const;
405
406 /// Returns if the result of this instruction with a 16-bit result returned in
407 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
408 /// the original value.
409 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
410
411 bool supportsWGP() const {
412 if (GFX1250Insts)
413 return false;
414 return getGeneration() >= GFX10;
415 }
416
417 bool hasIntClamp() const {
418 return HasIntClamp;
419 }
420
421 bool hasFP64() const {
422 return FP64;
423 }
424
425 bool hasMIMG_R128() const {
426 return MIMG_R128;
427 }
428
429 bool hasHWFP64() const {
430 return FP64;
431 }
432
433 bool hasHalfRate64Ops() const {
434 return HalfRate64Ops;
435 }
436
437 bool hasFullRate64Ops() const {
438 return FullRate64Ops;
439 }
440
441 bool hasAddr64() const {
443 }
444
445 bool hasFlat() const {
447 }
448
449 // Return true if the target only has the reverse operand versions of VALU
450 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
451 bool hasOnlyRevVALUShifts() const {
453 }
454
455 bool hasFractBug() const {
457 }
458
459 bool hasMed3_16() const {
461 }
462
463 bool hasMin3Max3_16() const {
465 }
466
467 bool hasFmaMixInsts() const {
468 return HasFmaMixInsts;
469 }
470
471 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
472
473 bool hasFMA() const {
474 return FMA;
475 }
476
477 bool hasSwap() const {
478 return GFX9Insts;
479 }
480
481 bool hasScalarPackInsts() const {
482 return GFX9Insts;
483 }
484
485 bool hasScalarMulHiInsts() const {
486 return GFX9Insts;
487 }
488
489 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
490
494
496 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
497 return getGeneration() >= GFX9;
498 }
499
500 /// True if the offset field of DS instructions works as expected. On SI, the
501 /// offset uses a 16-bit adder and does not always wrap properly.
502 bool hasUsableDSOffset() const {
503 return getGeneration() >= SEA_ISLANDS;
504 }
505
509
510 /// Condition output from div_scale is usable.
514
515 /// Extra wait hazard is needed in some cases before
516 /// s_cbranch_vccnz/s_cbranch_vccz.
517 bool hasReadVCCZBug() const {
518 return getGeneration() <= SEA_ISLANDS;
519 }
520
521 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
523 return getGeneration() >= GFX10;
524 }
525
526 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
527 /// was written by a VALU instruction.
530 }
531
532 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
533 /// SGPR was written by a VALU Instruction.
536 }
537
538 bool hasRFEHazards() const {
540 }
541
542 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
543 unsigned getSetRegWaitStates() const {
544 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
545 }
546
547 bool dumpCode() const {
548 return DumpCode;
549 }
550
551 /// Return the amount of LDS that can be used that will not restrict the
552 /// occupancy lower than WaveCount.
553 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
554 const Function &) const;
555
558 }
559
560 /// \returns If target supports S_DENORM_MODE.
561 bool hasDenormModeInst() const {
563 }
564
565 bool useFlatForGlobal() const {
566 return FlatForGlobal;
567 }
568
569 /// \returns If target supports ds_read/write_b128 and user enables generation
570 /// of ds_read/write_b128.
571 bool useDS128() const {
572 return CIInsts && EnableDS128;
573 }
574
575 /// \return If target supports ds_read/write_b96/128.
576 bool hasDS96AndDS128() const {
577 return CIInsts;
578 }
579
580 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
581 bool haveRoundOpsF64() const {
582 return CIInsts;
583 }
584
585 /// \returns If MUBUF instructions always perform range checking, even for
586 /// buffer resources used for private memory access.
590
591 /// \returns If target requires PRT Struct NULL support (zero result registers
592 /// for sparse texture support).
593 bool usePRTStrictNull() const {
594 return EnablePRTStrictNull;
595 }
596
600
601 /// \returns true if the target supports backing off of s_barrier instructions
602 /// when an exception is raised.
604 return BackOffBarrier;
605 }
606
609 }
610
614
615 bool hasUnalignedDSAccess() const {
616 return UnalignedDSAccess;
617 }
618
622
625 }
626
630
632 return UnalignedAccessMode;
633 }
634
636
637 bool hasApertureRegs() const {
638 return HasApertureRegs;
639 }
640
641 bool isTrapHandlerEnabled() const {
642 return TrapHandler;
643 }
644
645 bool isXNACKEnabled() const {
646 return TargetID.isXnackOnOrAny();
647 }
648
649 bool isTgSplitEnabled() const {
650 return EnableTgSplit;
651 }
652
653 bool isCuModeEnabled() const {
654 return EnableCuMode;
655 }
656
658
659 bool hasFlatAddressSpace() const {
660 return FlatAddressSpace;
661 }
662
663 bool hasFlatScrRegister() const {
664 return hasFlatAddressSpace();
665 }
666
667 bool hasFlatInstOffsets() const {
668 return FlatInstOffsets;
669 }
670
671 bool hasFlatGlobalInsts() const {
672 return FlatGlobalInsts;
673 }
674
675 bool hasFlatScratchInsts() const {
676 return FlatScratchInsts;
677 }
678
679 // Check if target supports ST addressing mode with FLAT scratch instructions.
680 // The ST addressing mode means no registers are used, either VGPR or SGPR,
681 // but only immediate offset is swizzled and added to the FLAT scratch base.
682 bool hasFlatScratchSTMode() const {
684 }
685
686 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
687
690 }
691
692 bool enableFlatScratch() const {
693 return flatScratchIsArchitected() ||
695 }
696
697 bool hasGlobalAddTidInsts() const {
698 return GFX10_BEncoding;
699 }
700
701 bool hasAtomicCSub() const {
702 return GFX10_BEncoding;
703 }
704
705 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
706
707 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
708
709 bool hasExportInsts() const {
710 return !hasGFX940Insts() && !hasGFX1250Insts();
711 }
712
713 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
714
715 // DS_ADD_F64/DS_ADD_RTN_F64
716 bool hasLdsAtomicAddF64() const {
717 return hasGFX90AInsts() || hasGFX1250Insts();
718 }
719
721 return getGeneration() >= GFX9;
722 }
723
726 }
727
729 return getGeneration() > GFX9;
730 }
731
732 bool hasD16LoadStore() const {
733 return getGeneration() >= GFX9;
734 }
735
737 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
738 }
739
740 bool hasD16Images() const {
742 }
743
744 /// Return if most LDS instructions have an m0 use that require m0 to be
745 /// initialized.
746 bool ldsRequiresM0Init() const {
747 return getGeneration() < GFX9;
748 }
749
750 // True if the hardware rewinds and replays GWS operations if a wave is
751 // preempted.
752 //
753 // If this is false, a GWS operation requires testing if a nack set the
754 // MEM_VIOL bit, and repeating if so.
755 bool hasGWSAutoReplay() const {
756 return getGeneration() >= GFX9;
757 }
758
759 /// \returns if target has ds_gws_sema_release_all instruction.
760 bool hasGWSSemaReleaseAll() const {
761 return CIInsts;
762 }
763
764 /// \returns true if the target has integer add/sub instructions that do not
765 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
766 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
767 /// for saturation.
768 bool hasAddNoCarry() const {
769 return AddNoCarryInsts;
770 }
771
772 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
773
774 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
775
776 bool hasUnpackedD16VMem() const {
777 return HasUnpackedD16VMem;
778 }
779
780 // Covers VS/PS/CS graphics shaders
781 bool isMesaGfxShader(const Function &F) const {
782 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
783 }
784
785 bool hasMad64_32() const {
786 return getGeneration() >= SEA_ISLANDS;
787 }
788
789 bool hasSDWAOmod() const {
790 return HasSDWAOmod;
791 }
792
793 bool hasSDWAScalar() const {
794 return HasSDWAScalar;
795 }
796
797 bool hasSDWASdst() const {
798 return HasSDWASdst;
799 }
800
801 bool hasSDWAMac() const {
802 return HasSDWAMac;
803 }
804
805 bool hasSDWAOutModsVOPC() const {
806 return HasSDWAOutModsVOPC;
807 }
808
809 bool hasDLInsts() const {
810 return HasDLInsts;
811 }
812
813 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
814
815 bool hasDot1Insts() const {
816 return HasDot1Insts;
817 }
818
819 bool hasDot2Insts() const {
820 return HasDot2Insts;
821 }
822
823 bool hasDot3Insts() const {
824 return HasDot3Insts;
825 }
826
827 bool hasDot4Insts() const {
828 return HasDot4Insts;
829 }
830
831 bool hasDot5Insts() const {
832 return HasDot5Insts;
833 }
834
835 bool hasDot6Insts() const {
836 return HasDot6Insts;
837 }
838
839 bool hasDot7Insts() const {
840 return HasDot7Insts;
841 }
842
843 bool hasDot8Insts() const {
844 return HasDot8Insts;
845 }
846
847 bool hasDot9Insts() const {
848 return HasDot9Insts;
849 }
850
851 bool hasDot10Insts() const {
852 return HasDot10Insts;
853 }
854
855 bool hasDot11Insts() const {
856 return HasDot11Insts;
857 }
858
859 bool hasDot12Insts() const {
860 return HasDot12Insts;
861 }
862
863 bool hasDot13Insts() const {
864 return HasDot13Insts;
865 }
866
867 bool hasMAIInsts() const {
868 return HasMAIInsts;
869 }
870
871 bool hasFP8Insts() const {
872 return HasFP8Insts;
873 }
874
876
877 bool hasMcastLoadInsts() const { return HasMcastLoadInsts; }
878
879 bool hasCubeInsts() const { return HasCubeInsts; }
880
881 bool hasLerpInst() const { return HasLerpInst; }
882
883 bool hasSadInsts() const { return HasSadInsts; }
884
885 bool hasQsadInsts() const { return HasQsadInsts; }
886
887 bool hasCvtNormInsts() const { return HasCvtNormInsts; }
888
890
892
893 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
894
895 bool hasPkFmacF16Inst() const {
896 return HasPkFmacF16Inst;
897 }
898
902
906
910
914
916
918
922
924
926
930
934
938
942
944
945 /// \return true if the target has flat, global, and buffer atomic fadd for
946 /// double.
950
951 /// \return true if the target's flat, global, and buffer atomic fadd for
952 /// float supports denormal handling.
956
957 /// \return true if atomic operations targeting fine-grained memory work
958 /// correctly at device scope, in allocations in host or peer PCIe device
959 /// memory.
963
964 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
965 /// via CAS loop.
969
971
975
976 bool hasNoSdstCMPX() const {
977 return HasNoSdstCMPX;
978 }
979
980 bool hasVscnt() const {
981 return HasVscnt;
982 }
983
984 bool hasGetWaveIdInst() const {
985 return HasGetWaveIdInst;
986 }
987
988 bool hasSMemTimeInst() const {
989 return HasSMemTimeInst;
990 }
991
994 }
995
999
1000 bool hasVOP3Literal() const {
1001 return HasVOP3Literal;
1002 }
1003
1004 bool hasNoDataDepHazard() const {
1005 return HasNoDataDepHazard;
1006 }
1007
1009 return getGeneration() < SEA_ISLANDS;
1010 }
1011
1012 bool hasInstPrefetch() const {
1013 return getGeneration() == GFX10 || getGeneration() == GFX11;
1014 }
1015
1016 bool hasPrefetch() const { return GFX12Insts; }
1017
1018 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1019
1021
1022 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1023
1024 // Has s_cmpk_* instructions.
1025 bool hasSCmpK() const { return getGeneration() < GFX12; }
1026
1027 // Scratch is allocated in 256 dword per wave blocks for the entire
1028 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1029 // is 4-byte aligned.
1030 //
1031 // Only 4-byte alignment is really needed to access anything. Transformations
1032 // on the pointer value itself may rely on the alignment / known low bits of
1033 // the pointer. Set this to something above the minimum to avoid needing
1034 // dynamic realignment in common cases.
1035 Align getStackAlignment() const { return Align(16); }
1036
1037 bool enableMachineScheduler() const override {
1038 return true;
1039 }
1040
1041 bool useAA() const override;
1042
1043 bool enableSubRegLiveness() const override {
1044 return true;
1045 }
1046
1049
1050 // static wrappers
1051 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1052
1053 // XXX - Why is this here if it isn't in the default pass set?
1054 bool enableEarlyIfConversion() const override {
1055 return true;
1056 }
1057
1059 const SchedRegion &Region) const override;
1060
1062 const SchedRegion &Region) const override;
1063
1064 void mirFileLoaded(MachineFunction &MF) const override;
1065
1066 unsigned getMaxNumUserSGPRs() const {
1067 return AMDGPU::getMaxNumUserSGPRs(*this);
1068 }
1069
1070 bool hasSMemRealTime() const {
1071 return HasSMemRealTime;
1072 }
1073
1074 bool hasMovrel() const {
1075 return HasMovrel;
1076 }
1077
1078 bool hasVGPRIndexMode() const {
1079 return HasVGPRIndexMode;
1080 }
1081
1082 bool useVGPRIndexMode() const;
1083
1085 return getGeneration() >= VOLCANIC_ISLANDS;
1086 }
1087
1089
1090 bool hasScalarStores() const {
1091 return HasScalarStores;
1092 }
1093
1094 bool hasScalarAtomics() const {
1095 return HasScalarAtomics;
1096 }
1097
1098 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1100
1101 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1102 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1103
1104 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1105 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1106
1107 bool hasDPP() const {
1108 return HasDPP;
1109 }
1110
1111 bool hasDPPBroadcasts() const {
1112 return HasDPP && getGeneration() < GFX10;
1113 }
1114
1116 return HasDPP && getGeneration() < GFX10;
1117 }
1118
1119 bool hasDPP8() const {
1120 return HasDPP8;
1121 }
1122
1123 bool hasDPALU_DPP() const {
1124 return HasDPALU_DPP;
1125 }
1126
1127 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1128
1129 bool hasPackedFP32Ops() const {
1130 return HasPackedFP32Ops;
1131 }
1132
1133 // Has V_PK_MOV_B32 opcode
1134 bool hasPkMovB32() const {
1135 return GFX90AInsts;
1136 }
1137
1139 return getGeneration() >= GFX10 || hasGFX940Insts();
1140 }
1141
1142 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1143
1144 bool hasImageInsts() const {
1145 return HasImageInsts;
1146 }
1147
1149 return HasExtendedImageInsts;
1150 }
1151
1152 bool hasR128A16() const {
1153 return HasR128A16;
1154 }
1155
1156 bool hasA16() const { return HasA16; }
1157
1158 bool hasG16() const { return HasG16; }
1159
1160 bool hasOffset3fBug() const {
1161 return HasOffset3fBug;
1162 }
1163
1165
1167
1168 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1169
1171
1173
1174 bool hasNSAEncoding() const { return HasNSAEncoding; }
1175
1176 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1177
1179
1180 unsigned getNSAMaxSize(bool HasSampler = false) const {
1181 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1182 }
1183
1184 bool hasGFX10_AEncoding() const {
1185 return GFX10_AEncoding;
1186 }
1187
1188 bool hasGFX10_BEncoding() const {
1189 return GFX10_BEncoding;
1190 }
1191
1192 bool hasGFX10_3Insts() const {
1193 return GFX10_3Insts;
1194 }
1195
1196 bool hasMadF16() const;
1197
1198 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1199
1200 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1201
1202 // Scalar and global loads support scale_offset bit.
1203 bool hasScaleOffset() const { return GFX1250Insts; }
1204
1205 bool hasFlatGVSMode() const { return FlatGVSMode; }
1206
1207 // FLAT GLOBAL VOffset is signed
1208 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1209
1210 bool enableSIScheduler() const {
1211 return EnableSIScheduler;
1212 }
1213
1214 bool loadStoreOptEnabled() const {
1215 return EnableLoadStoreOpt;
1216 }
1217
1218 bool hasSGPRInitBug() const {
1219 return SGPRInitBug;
1220 }
1221
1223 return UserSGPRInit16Bug && isWave32();
1224 }
1225
1227
1231
1234 }
1235
1239
1240 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1242 return CIInsts;
1243 }
1244
1247 }
1248
1253
1256 }
1257
1260 }
1261
1264 }
1265
1268 }
1269
1272 }
1273
1274 bool hasLDSMisalignedBug() const {
1275 return LDSMisalignedBug && !EnableCuMode;
1276 }
1277
1279 return HasInstFwdPrefetchBug;
1280 }
1281
1283 return HasVcmpxExecWARHazard;
1284 }
1285
1288 }
1289
1290 // Shift amount of a 64 bit shift cannot be a highest allocated register
1291 // if also at the end of the allocation block.
1293 return GFX90AInsts && !GFX940Insts;
1294 }
1295
1296 // Has one cycle hazard on transcendental instruction feeding a
1297 // non transcendental VALU.
1298 bool hasTransForwardingHazard() const { return GFX940Insts; }
1299
1300 // Has one cycle hazard on a VALU instruction partially writing dst with
1301 // a shift of result bits feeding another VALU instruction.
1303
1304 // Cannot use op_sel with v_dot instructions.
1305 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1306
1307 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1308 bool hasVDecCoExecHazard() const {
1309 return GFX940Insts;
1310 }
1311
1312 bool hasNSAtoVMEMBug() const {
1313 return HasNSAtoVMEMBug;
1314 }
1315
1316 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1317
1318 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1319
1320 bool hasGFX90AInsts() const { return GFX90AInsts; }
1321
1323 return getGeneration() == GFX10;
1324 }
1325
1326 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1327
1328 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1329
1330 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1331
1333 return getGeneration() == GFX11;
1334 }
1335
1337
1339
1340 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1341
1343
1347
1348 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1349
1350 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1351
1353 return GFX1250Insts && getGeneration() == GFX12;
1354 }
1355
1356 /// Return if operations acting on VGPR tuples require even alignment.
1357 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1358
1359 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1360 bool hasSPackHL() const { return GFX11Insts; }
1361
1362 /// Return true if the target's EXP instruction has the COMPR flag, which
1363 /// affects the meaning of the EN (enable) bits.
1364 bool hasCompressedExport() const { return !GFX11Insts; }
1365
1366 /// Return true if the target's EXP instruction supports the NULL export
1367 /// target.
1368 bool hasNullExportTarget() const { return !GFX11Insts; }
1369
1370 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1371
1372 bool hasVOPDInsts() const { return HasVOPDInsts; }
1373
1375
1376 /// Return true if the target has the S_DELAY_ALU instruction.
1377 bool hasDelayAlu() const { return GFX11Insts; }
1378
1379 bool hasPackedTID() const { return HasPackedTID; }
1380
1381 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1382 // hasGFX90AInsts is also true.
1383 bool hasGFX940Insts() const { return GFX940Insts; }
1384
1385 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1386 // hasGFX940Insts and hasGFX90AInsts are also true.
1387 bool hasGFX950Insts() const { return GFX950Insts; }
1388
1389 /// Returns true if the target supports
1390 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1391 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1392 bool hasLDSLoadB96_B128() const {
1393 return hasGFX950Insts();
1394 }
1395
1396 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1397
1398 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1399
1401
1403
1405
1407
1408 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1409 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1410 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1411
1412 /// \returns true if inline constants are not supported for F16 pseudo
1413 /// scalar transcendentals.
1415 return getGeneration() == GFX12;
1416 }
1417
1418 /// \returns true if the target has instructions with xf32 format support.
1419 bool hasXF32Insts() const { return HasXF32Insts; }
1420
1421 /// \returns true if the target has packed f32 instructions that only read 32
1422 /// bits from a scalar operand (SGPR or literal) and replicates the bits to
1423 /// both channels.
1427
1428 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1429
1430 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1431 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1432 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1433
1436 }
1437
1440 }
1441
1442 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1443
1444 bool hasTanhInsts() const { return HasTanhInsts; }
1445
1447
1448 bool hasAddPC64Inst() const { return GFX1250Insts; }
1449
1451
1453
1456 }
1457
1459
1460 /// \returns true if the target supports expert scheduling mode 2 which relies
1461 /// on the compiler to insert waits to avoid hazards between VMEM and VALU
1462 /// instructions in some instances.
1463 bool hasExpertSchedulingMode() const { return getGeneration() >= GFX12; }
1464
1465 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1466 /// GFX1250.
1467 bool hasWaitXCnt() const { return HasWaitXcnt; }
1468
1469 // A single DWORD instructions can use a 64-bit literal.
1470 bool has64BitLiterals() const { return Has64BitLiterals; }
1471
1473
1475
1476 /// \returns The maximum number of instructions that can be enclosed in an
1477 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1478 /// instruction.
1479 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1480
1481 bool hasPrngInst() const { return HasPrngInst; }
1482
1484
1485 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1486 /// SGPRs
1487 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1488
1489 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1490 /// VGPRs
1491 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1492 unsigned DynamicVGPRBlockSize) const;
1493
1494 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1495 /// be achieved when the only function running on a CU is \p F, each workgroup
1496 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1497 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1498 /// range, so this returns a range as well.
1499 ///
1500 /// Note that occupancy can be affected by the scratch allocation as well, but
1501 /// we do not have enough information to compute it.
1502 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1503 unsigned LDSSize = 0,
1504 unsigned NumSGPRs = 0,
1505 unsigned NumVGPRs = 0) const;
1506
1507 /// \returns true if the flat_scratch register should be initialized with the
1508 /// pointer to the wave's scratch memory rather than a size and offset.
1511 }
1512
1513 /// \returns true if the flat_scratch register is initialized by the HW.
1514 /// In this case it is readonly.
1516
1517 /// \returns true if the architected SGPRs are enabled.
1519
1520 /// \returns true if Global Data Share is supported.
1521 bool hasGDS() const { return HasGDS; }
1522
1523 /// \returns true if Global Wave Sync is supported.
1524 bool hasGWS() const { return HasGWS; }
1525
1526 /// \returns true if the machine has merged shaders in which s0-s7 are
1527 /// reserved by the hardware and user SGPRs start at s8
1528 bool hasMergedShaders() const {
1529 return getGeneration() >= GFX9;
1530 }
1531
1532 // \returns true if the target supports the pre-NGG legacy geometry path.
1533 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1534
1535 // \returns true if preloading kernel arguments is supported.
1536 bool hasKernargPreload() const { return KernargPreload; }
1537
1538 // \returns true if the target has split barriers feature
1539 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1540
1541 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1542 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1543
1544 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1545 // no-return form.
1547
1548 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1549 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1550
1551 // \returns true if the target has IEEE kernel descriptor mode bit
1552 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1553
1554 // \returns true if the target has IEEE fminimum/fmaximum instructions
1556
1557 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1558 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1559
1560 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1561 /// values.
1562 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1563
1564 bool hasGFX1250Insts() const { return GFX1250Insts; }
1565
1566 bool hasVOPD3() const { return GFX1250Insts; }
1567
1568 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1569 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1570
1571 // \returns true if the target has V_MAD_U32 instruction.
1572 bool hasMadU32Inst() const { return HasMadU32Inst; }
1573
1574 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1575 bool hasVectorMulU64() const { return GFX1250Insts; }
1576
1577 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1578 // instructions.
1579 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1580
1581 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1582 bool hasIntMinMax64() const { return GFX1250Insts; }
1583
1584 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1585 bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
1586
1587 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1589
1590 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1591 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1592
1593 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1594 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1595
1596 // \returns true if target has S_SETPRIO_INC_WG instruction.
1598
1599 // \returns true if target has S_WAKEUP_BARRIER instruction.
1600 bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }
1601
1602 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1603 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1604 // extended VA to 57 bits.
1605 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1606
1607 // \returns true if the target needs to create a prolog for backward
1608 // compatibility when preloading kernel arguments.
1610 return hasKernargPreload() && !GFX1250Insts;
1611 }
1612
1613 bool hasCondSubInsts() const { return GFX12Insts; }
1614
1615 bool hasSubClampInsts() const { return hasGFX10_3Insts(); }
1616
1617 /// \returns SGPR allocation granularity supported by the subtarget.
1618 unsigned getSGPRAllocGranule() const {
1620 }
1621
1622 /// \returns SGPR encoding granularity supported by the subtarget.
1623 unsigned getSGPREncodingGranule() const {
1625 }
1626
1627 /// \returns Total number of SGPRs supported by the subtarget.
1628 unsigned getTotalNumSGPRs() const {
1630 }
1631
1632 /// \returns Addressable number of SGPRs supported by the subtarget.
1633 unsigned getAddressableNumSGPRs() const {
1635 }
1636
1637 /// \returns Minimum number of SGPRs that meets the given number of waves per
1638 /// execution unit requirement supported by the subtarget.
1639 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1640 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1641 }
1642
1643 /// \returns Maximum number of SGPRs that meets the given number of waves per
1644 /// execution unit requirement supported by the subtarget.
1645 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1646 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1647 }
1648
1649 /// \returns Reserved number of SGPRs. This is common
1650 /// utility function called by MachineFunction and
1651 /// Function variants of getReservedNumSGPRs.
1652 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1653 /// \returns Reserved number of SGPRs for given machine function \p MF.
1654 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1655
1656 /// \returns Reserved number of SGPRs for given function \p F.
1657 unsigned getReservedNumSGPRs(const Function &F) const;
1658
1659 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1660 unsigned getMaxNumPreloadedSGPRs() const;
1661
1662 /// \returns max num SGPRs. This is the common utility
1663 /// function called by MachineFunction and Function
1664 /// variants of getMaxNumSGPRs.
1665 unsigned getBaseMaxNumSGPRs(const Function &F,
1666 std::pair<unsigned, unsigned> WavesPerEU,
1667 unsigned PreloadedSGPRs,
1668 unsigned ReservedNumSGPRs) const;
1669
1670 /// \returns Maximum number of SGPRs that meets number of waves per execution
1671 /// unit requirement for function \p MF, or number of SGPRs explicitly
1672 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1673 ///
1674 /// \returns Value that meets number of waves per execution unit requirement
1675 /// if explicitly requested value cannot be converted to integer, violates
1676 /// subtarget's specifications, or does not meet number of waves per execution
1677 /// unit requirement.
1678 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1679
1680 /// \returns Maximum number of SGPRs that meets number of waves per execution
1681 /// unit requirement for function \p F, or number of SGPRs explicitly
1682 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1683 ///
1684 /// \returns Value that meets number of waves per execution unit requirement
1685 /// if explicitly requested value cannot be converted to integer, violates
1686 /// subtarget's specifications, or does not meet number of waves per execution
1687 /// unit requirement.
1688 unsigned getMaxNumSGPRs(const Function &F) const;
1689
1690 /// \returns VGPR allocation granularity supported by the subtarget.
1691 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1692 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1693 }
1694
1695 /// \returns VGPR encoding granularity supported by the subtarget.
1696 unsigned getVGPREncodingGranule() const {
1698 }
1699
1700 /// \returns Total number of VGPRs supported by the subtarget.
1701 unsigned getTotalNumVGPRs() const {
1703 }
1704
1705 /// \returns Addressable number of architectural VGPRs supported by the
1706 /// subtarget.
1710
1711 /// \returns Addressable number of VGPRs supported by the subtarget.
1712 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1713 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1714 }
1715
1716 /// \returns the minimum number of VGPRs that will prevent achieving more than
1717 /// the specified number of waves \p WavesPerEU.
1718 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1719 unsigned DynamicVGPRBlockSize) const {
1720 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1721 DynamicVGPRBlockSize);
1722 }
1723
1724 /// \returns the maximum number of VGPRs that can be used and still achieved
1725 /// at least the specified number of waves \p WavesPerEU.
1726 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1727 unsigned DynamicVGPRBlockSize) const {
1728 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1729 DynamicVGPRBlockSize);
1730 }
1731
1732 /// \returns max num VGPRs. This is the common utility function
1733 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1734 unsigned
1736 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1737
1738 /// \returns Maximum number of VGPRs that meets number of waves per execution
1739 /// unit requirement for function \p F, or number of VGPRs explicitly
1740 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1741 ///
1742 /// \returns Value that meets number of waves per execution unit requirement
1743 /// if explicitly requested value cannot be converted to integer, violates
1744 /// subtarget's specifications, or does not meet number of waves per execution
1745 /// unit requirement.
1746 unsigned getMaxNumVGPRs(const Function &F) const;
1747
1748 unsigned getMaxNumAGPRs(const Function &F) const {
1749 return getMaxNumVGPRs(F);
1750 }
1751
1752 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1753 /// of waves per execution unit required for the function \p MF.
1754 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1755
1756 /// \returns Maximum number of VGPRs that meets number of waves per execution
1757 /// unit requirement for function \p MF, or number of VGPRs explicitly
1758 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1759 ///
1760 /// \returns Value that meets number of waves per execution unit requirement
1761 /// if explicitly requested value cannot be converted to integer, violates
1762 /// subtarget's specifications, or does not meet number of waves per execution
1763 /// unit requirement.
1764 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1765
1766 bool supportsWave32() const { return getGeneration() >= GFX10; }
1767
1768 bool supportsWave64() const { return !hasGFX1250Insts(); }
1769
1770 bool isWave32() const {
1771 return getWavefrontSize() == 32;
1772 }
1773
1774 bool isWave64() const {
1775 return getWavefrontSize() == 64;
1776 }
1777
1778 /// Returns if the wavesize of this subtarget is known reliable. This is false
1779 /// only for the a default target-cpu that does not have an explicit
1780 /// +wavefrontsize target feature.
1781 bool isWaveSizeKnown() const {
1782 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1783 hasFeature(AMDGPU::FeatureWavefrontSize64);
1784 }
1785
1787 return getRegisterInfo()->getBoolRC();
1788 }
1789
1790 /// \returns Maximum number of work groups per compute unit supported by the
1791 /// subtarget and limited by given \p FlatWorkGroupSize.
1792 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1793 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1794 }
1795
1796 /// \returns Minimum flat work group size supported by the subtarget.
1797 unsigned getMinFlatWorkGroupSize() const override {
1799 }
1800
1801 /// \returns Maximum flat work group size supported by the subtarget.
1802 unsigned getMaxFlatWorkGroupSize() const override {
1804 }
1805
1806 /// \returns Number of waves per execution unit required to support the given
1807 /// \p FlatWorkGroupSize.
1808 unsigned
1809 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1810 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1811 }
1812
1813 /// \returns Minimum number of waves per execution unit supported by the
1814 /// subtarget.
1815 unsigned getMinWavesPerEU() const override {
1817 }
1818
1819 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1820 SDep &Dep,
1821 const TargetSchedModel *SchedModel) const override;
1822
1823 // \returns true if it's beneficial on this subtarget for the scheduler to
1824 // cluster stores as well as loads.
1825 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1826
1827 // \returns the number of address arguments from which to enable MIMG NSA
1828 // on supported architectures.
1829 unsigned getNSAThreshold(const MachineFunction &MF) const;
1830
1831 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1832 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1834
1835 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1836 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1838
1839 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1840 unsigned getDynamicVGPRBlockSize() const {
1841 return DynamicVGPRBlockSize32 ? 32 : 16;
1842 }
1843
1845 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1846 // to the same register.
1847 return false;
1848 }
1849
1850 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1851 // and surronded by S_WAIT_ALU(0xFFE3).
1853 return getGeneration() == GFX12;
1854 }
1855
1856 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1857 // read.
1859 return GFX1250Insts && getGeneration() == GFX12;
1860 }
1861
1862 // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit
1863 // result.
1865 return GFX1250Insts && getGeneration() == GFX12;
1866 }
1867
1868 /// \returns true if the subtarget supports clusters of workgroups.
1869 bool hasClusters() const { return HasClusters; }
1870
1871 /// \returns true if the subtarget requires a wait for xcnt before VMEM
1872 /// accesses that must never be repeated in the event of a page fault/re-try.
1873 /// Atomic stores/rmw and all volatile accesses fall under this criteria.
1877
1878 /// \returns the number of significant bits in the immediate field of the
1879 /// S_NOP instruction.
1880 unsigned getSNopBits() const {
1882 return 7;
1884 return 4;
1885 return 3;
1886 }
1887
1888 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1889 /// num_records.
1893
1897
1901
1903 return (getGeneration() <= AMDGPUSubtarget::GFX9 ||
1905 isWave32();
1906 }
1907};
1908
1910public:
1911 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1912
1913 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1914
1915 bool hasDispatchPtr() const { return DispatchPtr; }
1916
1917 bool hasQueuePtr() const { return QueuePtr; }
1918
1919 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1920
1921 bool hasDispatchID() const { return DispatchID; }
1922
1923 bool hasFlatScratchInit() const { return FlatScratchInit; }
1924
1925 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1926
1927 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1928
1929 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1930
1931 unsigned getNumFreeUserSGPRs();
1932
1933 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1934
1945
1946 // Returns the size in number of SGPRs for preload user SGPR field.
1948 switch (ID) {
1950 return 2;
1952 return 4;
1953 case DispatchPtrID:
1954 return 2;
1955 case QueuePtrID:
1956 return 2;
1958 return 2;
1959 case DispatchIdID:
1960 return 2;
1961 case FlatScratchInitID:
1962 return 2;
1964 return 1;
1965 }
1966 llvm_unreachable("Unknown UserSGPRID.");
1967 }
1968
1969 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1970
1971private:
1972 const GCNSubtarget &ST;
1973
1974 // Private memory buffer
1975 // Compute directly in sgpr[0:1]
1976 // Other shaders indirect 64-bits at sgpr[0:1]
1977 bool ImplicitBufferPtr = false;
1978
1979 bool PrivateSegmentBuffer = false;
1980
1981 bool DispatchPtr = false;
1982
1983 bool QueuePtr = false;
1984
1985 bool KernargSegmentPtr = false;
1986
1987 bool DispatchID = false;
1988
1989 bool FlatScratchInit = false;
1990
1991 bool PrivateSegmentSize = false;
1992
1993 unsigned NumKernargPreloadSGPRs = 0;
1994
1995 unsigned NumUsedUserSGPRs = 0;
1996};
1997
1998} // end namespace llvm
1999
2000#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
bool hasExpertSchedulingMode() const
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasFlatScratchHiInB64InstHazard() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
bool hasSWakeupBarrier() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
bool hasCondSubInsts() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasCvtPkNormVOP3Insts() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
bool hasSetregVGPRMSBFixup() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool RequiresWaitsBeforeSystemScopeStores
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
bool hasMcastLoadInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool hasCvtNormInsts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasLerpInst() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool supportsWaveWideBPermute() const
bool hasCubeInsts() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool supportsBPermute() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasSubClampInsts() const
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool requiresWaitXCntForSingleAccessInstructions() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasSadInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasCvtPkNormVOP2Insts() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasQsadInsts() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool requiresWaitsBeforeSystemScopeStores() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.