LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Possibly statically set by tablegen, but may want to be overridden.
70 bool FastDenormalF32 = false;
71 bool HalfRate64Ops = false;
72 bool FullRate64Ops = false;
73
74 // Dynamically set bits that enable features.
75 bool FlatForGlobal = false;
77 bool BackOffBarrier = false;
79 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasFP8E5M3Insts = false;
170 bool HasCvtFP8Vop1Bug = false;
171 bool HasPkFmacF16Inst = false;
192 bool HasXF32Insts = false;
193 /// The maximum number of instructions that may be placed within an S_CLAUSE,
194 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
195 /// indicates a lack of S_CLAUSE support.
197 bool SupportsSRAMECC = false;
198 bool DynamicVGPR = false;
200 bool HasVMemToLDSLoad = false;
201 bool RequiresAlignVGPR = false;
202
203 // This should not be used directly. 'TargetID' tracks the dynamic settings
204 // for SRAMECC.
205 bool EnableSRAMECC = false;
206
207 bool HasNoSdstCMPX = false;
208 bool HasVscnt = false;
209 bool HasWaitXcnt = false;
210 bool HasGetWaveIdInst = false;
211 bool HasSMemTimeInst = false;
214 bool HasVOP3Literal = false;
215 bool HasNoDataDepHazard = false;
216 bool FlatAddressSpace = false;
217 bool FlatInstOffsets = false;
218 bool FlatGlobalInsts = false;
219 bool FlatScratchInsts = false;
220 bool FlatGVSMode = false;
223 bool EnableFlatScratch = false;
225 bool HasGDS = false;
226 bool HasGWS = false;
227 bool AddNoCarryInsts = false;
228 bool HasUnpackedD16VMem = false;
229 bool LDSMisalignedBug = false;
232 bool UnalignedDSAccess = false;
233 bool HasPackedTID = false;
234 bool ScalarizeGlobal = false;
235 bool HasSALUFloatInsts = false;
238 bool Has64BitLiterals = false;
240 bool HasBitOp3Insts = false;
241 bool HasTanhInsts = false;
244 bool HasPrngInst = false;
246 bool HasPermlane16Swap = false;
247 bool HasPermlane32Swap = false;
252 bool HasVmemPrefInsts = false;
254 bool HasSafeCUPrefetch = false;
257 bool HasNSAtoVMEMBug = false;
258 bool HasNSAClauseBug = false;
259 bool HasOffset3fBug = false;
265 bool Has1_5xVGPRs = false;
266 bool HasMADIntraFwdBug = false;
267 bool HasVOPDInsts = false;
271 bool HasAshrPkInsts = false;
275 bool HasMin3Max3PKF16 = false;
277 bool HasLshlAddU64Inst = false;
278 bool HasAddSubU64Insts = false;
279 bool HasMadU32Inst = false;
280 bool HasAddMinMaxInsts = false;
285
286 bool RequiresCOV6 = false;
289
291
292 bool HasClusters = false;
294
295 // Dummy feature to use for assembler in tablegen.
296 bool FeatureDisable = false;
297
298private:
299 SIInstrInfo InstrInfo;
300 SITargetLowering TLInfo;
301 SIFrameLowering FrameLowering;
302
303public:
304 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
305 const GCNTargetMachine &TM);
306 ~GCNSubtarget() override;
307
309 StringRef GPU, StringRef FS);
310
311 /// Diagnose inconsistent subtarget features before attempting to codegen
312 /// function \p F.
313 void checkSubtargetFeatures(const Function &F) const;
314
315 const SIInstrInfo *getInstrInfo() const override {
316 return &InstrInfo;
317 }
318
319 const SIFrameLowering *getFrameLowering() const override {
320 return &FrameLowering;
321 }
322
323 const SITargetLowering *getTargetLowering() const override {
324 return &TLInfo;
325 }
326
327 const SIRegisterInfo *getRegisterInfo() const override {
328 return &InstrInfo.getRegisterInfo();
329 }
330
331 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
332
333 const CallLowering *getCallLowering() const override {
334 return CallLoweringInfo.get();
335 }
336
337 const InlineAsmLowering *getInlineAsmLowering() const override {
338 return InlineAsmLoweringInfo.get();
339 }
340
342 return InstSelector.get();
343 }
344
345 const LegalizerInfo *getLegalizerInfo() const override {
346 return Legalizer.get();
347 }
348
349 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
350 return RegBankInfo.get();
351 }
352
354 return TargetID;
355 }
356
358 return &InstrItins;
359 }
360
362
364 return (Generation)Gen;
365 }
366
367 unsigned getMaxWaveScratchSize() const {
368 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
369 if (getGeneration() >= GFX12) {
370 // 18-bit field in units of 64-dword.
371 return (64 * 4) * ((1 << 18) - 1);
372 }
373 if (getGeneration() == GFX11) {
374 // 15-bit field in units of 64-dword.
375 return (64 * 4) * ((1 << 15) - 1);
376 }
377 // 13-bit field in units of 256-dword.
378 return (256 * 4) * ((1 << 13) - 1);
379 }
380
381 /// Return the number of high bits known to be zero for a frame index.
385
386 int getLDSBankCount() const {
387 return LDSBankCount;
388 }
389
390 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
391 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
392 }
393
394 unsigned getConstantBusLimit(unsigned Opcode) const;
395
396 /// Returns if the result of this instruction with a 16-bit result returned in
397 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
398 /// the original value.
399 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
400
401 bool supportsWGP() const {
402 if (GFX1250Insts)
403 return false;
404 return getGeneration() >= GFX10;
405 }
406
407 bool hasIntClamp() const {
408 return HasIntClamp;
409 }
410
411 bool hasFP64() const {
412 return FP64;
413 }
414
415 bool hasMIMG_R128() const {
416 return MIMG_R128;
417 }
418
419 bool hasHWFP64() const {
420 return FP64;
421 }
422
423 bool hasHalfRate64Ops() const {
424 return HalfRate64Ops;
425 }
426
427 bool hasFullRate64Ops() const {
428 return FullRate64Ops;
429 }
430
431 bool hasAddr64() const {
433 }
434
435 bool hasFlat() const {
437 }
438
439 // Return true if the target only has the reverse operand versions of VALU
440 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
441 bool hasOnlyRevVALUShifts() const {
443 }
444
445 bool hasFractBug() const {
447 }
448
449 bool hasBFE() const {
450 return true;
451 }
452
453 bool hasBFI() const {
454 return true;
455 }
456
457 bool hasBFM() const {
458 return hasBFE();
459 }
460
461 bool hasBCNT(unsigned Size) const {
462 return true;
463 }
464
465 bool hasFFBL() const {
466 return true;
467 }
468
469 bool hasFFBH() const {
470 return true;
471 }
472
473 bool hasMed3_16() const {
475 }
476
477 bool hasMin3Max3_16() const {
479 }
480
481 bool hasFmaMixInsts() const {
482 return HasFmaMixInsts;
483 }
484
485 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
486
487 bool hasCARRY() const {
488 return true;
489 }
490
491 bool hasFMA() const {
492 return FMA;
493 }
494
495 bool hasSwap() const {
496 return GFX9Insts;
497 }
498
499 bool hasScalarPackInsts() const {
500 return GFX9Insts;
501 }
502
503 bool hasScalarMulHiInsts() const {
504 return GFX9Insts;
505 }
506
507 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
508
512
514 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
515 return getGeneration() >= GFX9;
516 }
517
518 /// True if the offset field of DS instructions works as expected. On SI, the
519 /// offset uses a 16-bit adder and does not always wrap properly.
520 bool hasUsableDSOffset() const {
521 return getGeneration() >= SEA_ISLANDS;
522 }
523
527
528 /// Condition output from div_scale is usable.
532
533 /// Extra wait hazard is needed in some cases before
534 /// s_cbranch_vccnz/s_cbranch_vccz.
535 bool hasReadVCCZBug() const {
536 return getGeneration() <= SEA_ISLANDS;
537 }
538
539 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
541 return getGeneration() >= GFX10;
542 }
543
544 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
545 /// was written by a VALU instruction.
548 }
549
550 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
551 /// SGPR was written by a VALU Instruction.
554 }
555
556 bool hasRFEHazards() const {
558 }
559
560 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
561 unsigned getSetRegWaitStates() const {
562 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
563 }
564
565 bool dumpCode() const {
566 return DumpCode;
567 }
568
569 /// Return the amount of LDS that can be used that will not restrict the
570 /// occupancy lower than WaveCount.
571 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
572 const Function &) const;
573
576 }
577
578 /// \returns If target supports S_DENORM_MODE.
579 bool hasDenormModeInst() const {
581 }
582
583 bool useFlatForGlobal() const {
584 return FlatForGlobal;
585 }
586
587 /// \returns If target supports ds_read/write_b128 and user enables generation
588 /// of ds_read/write_b128.
589 bool useDS128() const {
590 return CIInsts && EnableDS128;
591 }
592
593 /// \return If target supports ds_read/write_b96/128.
594 bool hasDS96AndDS128() const {
595 return CIInsts;
596 }
597
598 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
599 bool haveRoundOpsF64() const {
600 return CIInsts;
601 }
602
603 /// \returns If MUBUF instructions always perform range checking, even for
604 /// buffer resources used for private memory access.
608
609 /// \returns If target requires PRT Struct NULL support (zero result registers
610 /// for sparse texture support).
611 bool usePRTStrictNull() const {
612 return EnablePRTStrictNull;
613 }
614
618
619 /// \returns true if the target supports backing off of s_barrier instructions
620 /// when an exception is raised.
622 return BackOffBarrier;
623 }
624
627 }
628
632
633 bool hasUnalignedDSAccess() const {
634 return UnalignedDSAccess;
635 }
636
640
643 }
644
648
650 return UnalignedAccessMode;
651 }
652
654
655 bool hasApertureRegs() const {
656 return HasApertureRegs;
657 }
658
659 bool isTrapHandlerEnabled() const {
660 return TrapHandler;
661 }
662
663 bool isXNACKEnabled() const {
664 return TargetID.isXnackOnOrAny();
665 }
666
667 bool isTgSplitEnabled() const {
668 return EnableTgSplit;
669 }
670
671 bool isCuModeEnabled() const {
672 return EnableCuMode;
673 }
674
676
677 bool hasFlatAddressSpace() const {
678 return FlatAddressSpace;
679 }
680
681 bool hasFlatScrRegister() const {
682 return hasFlatAddressSpace();
683 }
684
685 bool hasFlatInstOffsets() const {
686 return FlatInstOffsets;
687 }
688
689 bool hasFlatGlobalInsts() const {
690 return FlatGlobalInsts;
691 }
692
693 bool hasFlatScratchInsts() const {
694 return FlatScratchInsts;
695 }
696
697 // Check if target supports ST addressing mode with FLAT scratch instructions.
698 // The ST addressing mode means no registers are used, either VGPR or SGPR,
699 // but only immediate offset is swizzled and added to the FLAT scratch base.
700 bool hasFlatScratchSTMode() const {
702 }
703
704 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
705
708 }
709
710 bool enableFlatScratch() const {
711 return flatScratchIsArchitected() ||
713 }
714
715 bool hasGlobalAddTidInsts() const {
716 return GFX10_BEncoding;
717 }
718
719 bool hasAtomicCSub() const {
720 return GFX10_BEncoding;
721 }
722
723 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
724
725 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
726
727 bool hasExportInsts() const {
728 return !hasGFX940Insts() && !hasGFX1250Insts();
729 }
730
731 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
732
733 // DS_ADD_F64/DS_ADD_RTN_F64
734 bool hasLdsAtomicAddF64() const {
735 return hasGFX90AInsts() || hasGFX1250Insts();
736 }
737
739 return getGeneration() >= GFX9;
740 }
741
744 }
745
747 return getGeneration() > GFX9;
748 }
749
750 bool hasD16LoadStore() const {
751 return getGeneration() >= GFX9;
752 }
753
755 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
756 }
757
758 bool hasD16Images() const {
760 }
761
762 /// Return if most LDS instructions have an m0 use that require m0 to be
763 /// initialized.
764 bool ldsRequiresM0Init() const {
765 return getGeneration() < GFX9;
766 }
767
768 // True if the hardware rewinds and replays GWS operations if a wave is
769 // preempted.
770 //
771 // If this is false, a GWS operation requires testing if a nack set the
772 // MEM_VIOL bit, and repeating if so.
773 bool hasGWSAutoReplay() const {
774 return getGeneration() >= GFX9;
775 }
776
777 /// \returns if target has ds_gws_sema_release_all instruction.
778 bool hasGWSSemaReleaseAll() const {
779 return CIInsts;
780 }
781
782 /// \returns true if the target has integer add/sub instructions that do not
783 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
784 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
785 /// for saturation.
786 bool hasAddNoCarry() const {
787 return AddNoCarryInsts;
788 }
789
790 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
791
792 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
793
794 bool hasUnpackedD16VMem() const {
795 return HasUnpackedD16VMem;
796 }
797
798 // Covers VS/PS/CS graphics shaders
799 bool isMesaGfxShader(const Function &F) const {
800 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
801 }
802
803 bool hasMad64_32() const {
804 return getGeneration() >= SEA_ISLANDS;
805 }
806
807 bool hasSDWAOmod() const {
808 return HasSDWAOmod;
809 }
810
811 bool hasSDWAScalar() const {
812 return HasSDWAScalar;
813 }
814
815 bool hasSDWASdst() const {
816 return HasSDWASdst;
817 }
818
819 bool hasSDWAMac() const {
820 return HasSDWAMac;
821 }
822
823 bool hasSDWAOutModsVOPC() const {
824 return HasSDWAOutModsVOPC;
825 }
826
827 bool hasDLInsts() const {
828 return HasDLInsts;
829 }
830
831 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
832
833 bool hasDot1Insts() const {
834 return HasDot1Insts;
835 }
836
837 bool hasDot2Insts() const {
838 return HasDot2Insts;
839 }
840
841 bool hasDot3Insts() const {
842 return HasDot3Insts;
843 }
844
845 bool hasDot4Insts() const {
846 return HasDot4Insts;
847 }
848
849 bool hasDot5Insts() const {
850 return HasDot5Insts;
851 }
852
853 bool hasDot6Insts() const {
854 return HasDot6Insts;
855 }
856
857 bool hasDot7Insts() const {
858 return HasDot7Insts;
859 }
860
861 bool hasDot8Insts() const {
862 return HasDot8Insts;
863 }
864
865 bool hasDot9Insts() const {
866 return HasDot9Insts;
867 }
868
869 bool hasDot10Insts() const {
870 return HasDot10Insts;
871 }
872
873 bool hasDot11Insts() const {
874 return HasDot11Insts;
875 }
876
877 bool hasDot12Insts() const {
878 return HasDot12Insts;
879 }
880
881 bool hasDot13Insts() const {
882 return HasDot13Insts;
883 }
884
885 bool hasMAIInsts() const {
886 return HasMAIInsts;
887 }
888
889 bool hasFP8Insts() const {
890 return HasFP8Insts;
891 }
892
894
895 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
896
897 bool hasPkFmacF16Inst() const {
898 return HasPkFmacF16Inst;
899 }
900
904
908
912
916
918
920
924
926
928
932
936
940
944
946
947 /// \return true if the target has flat, global, and buffer atomic fadd for
948 /// double.
952
953 /// \return true if the target's flat, global, and buffer atomic fadd for
954 /// float supports denormal handling.
958
959 /// \return true if atomic operations targeting fine-grained memory work
960 /// correctly at device scope, in allocations in host or peer PCIe device
961 /// memory.
965
966 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
967 /// via CAS loop.
971
973
977
978 bool hasNoSdstCMPX() const {
979 return HasNoSdstCMPX;
980 }
981
982 bool hasVscnt() const {
983 return HasVscnt;
984 }
985
986 bool hasGetWaveIdInst() const {
987 return HasGetWaveIdInst;
988 }
989
990 bool hasSMemTimeInst() const {
991 return HasSMemTimeInst;
992 }
993
996 }
997
1001
1002 bool hasVOP3Literal() const {
1003 return HasVOP3Literal;
1004 }
1005
1006 bool hasNoDataDepHazard() const {
1007 return HasNoDataDepHazard;
1008 }
1009
1011 return getGeneration() < SEA_ISLANDS;
1012 }
1013
1014 bool hasInstPrefetch() const {
1015 return getGeneration() == GFX10 || getGeneration() == GFX11;
1016 }
1017
1018 bool hasPrefetch() const { return GFX12Insts; }
1019
1020 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1021
1023
1024 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1025
1026 // Has s_cmpk_* instructions.
1027 bool hasSCmpK() const { return getGeneration() < GFX12; }
1028
1029 // Scratch is allocated in 256 dword per wave blocks for the entire
1030 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1031 // is 4-byte aligned.
1032 //
1033 // Only 4-byte alignment is really needed to access anything. Transformations
1034 // on the pointer value itself may rely on the alignment / known low bits of
1035 // the pointer. Set this to something above the minimum to avoid needing
1036 // dynamic realignment in common cases.
1037 Align getStackAlignment() const { return Align(16); }
1038
1039 bool enableMachineScheduler() const override {
1040 return true;
1041 }
1042
1043 bool enableTerminalRule() const override { return true; }
1044
1045 bool useAA() const override;
1046
1047 bool enableSubRegLiveness() const override {
1048 return true;
1049 }
1050
1053
1054 // static wrappers
1055 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1056
1057 // XXX - Why is this here if it isn't in the default pass set?
1058 bool enableEarlyIfConversion() const override {
1059 return true;
1060 }
1061
1063 const SchedRegion &Region) const override;
1064
1066 const SchedRegion &Region) const override;
1067
1068 void mirFileLoaded(MachineFunction &MF) const override;
1069
1070 unsigned getMaxNumUserSGPRs() const {
1071 return AMDGPU::getMaxNumUserSGPRs(*this);
1072 }
1073
1074 bool hasSMemRealTime() const {
1075 return HasSMemRealTime;
1076 }
1077
1078 bool hasMovrel() const {
1079 return HasMovrel;
1080 }
1081
1082 bool hasVGPRIndexMode() const {
1083 return HasVGPRIndexMode;
1084 }
1085
1086 bool useVGPRIndexMode() const;
1087
1089 return getGeneration() >= VOLCANIC_ISLANDS;
1090 }
1091
1093
1094 bool hasScalarStores() const {
1095 return HasScalarStores;
1096 }
1097
1098 bool hasScalarAtomics() const {
1099 return HasScalarAtomics;
1100 }
1101
1102 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1104
1105 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1106 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1107
1108 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1109 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1110
1111 bool hasDPP() const {
1112 return HasDPP;
1113 }
1114
1115 bool hasDPPBroadcasts() const {
1116 return HasDPP && getGeneration() < GFX10;
1117 }
1118
1120 return HasDPP && getGeneration() < GFX10;
1121 }
1122
1123 bool hasDPP8() const {
1124 return HasDPP8;
1125 }
1126
1127 bool hasDPALU_DPP() const {
1128 return HasDPALU_DPP;
1129 }
1130
1131 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1132
1133 bool hasPackedFP32Ops() const {
1134 return HasPackedFP32Ops;
1135 }
1136
1137 // Has V_PK_MOV_B32 opcode
1138 bool hasPkMovB32() const {
1139 return GFX90AInsts;
1140 }
1141
1143 return getGeneration() >= GFX10 || hasGFX940Insts();
1144 }
1145
1146 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1147
1148 bool hasImageInsts() const {
1149 return HasImageInsts;
1150 }
1151
1153 return HasExtendedImageInsts;
1154 }
1155
1156 bool hasR128A16() const {
1157 return HasR128A16;
1158 }
1159
1160 bool hasA16() const { return HasA16; }
1161
1162 bool hasG16() const { return HasG16; }
1163
1164 bool hasOffset3fBug() const {
1165 return HasOffset3fBug;
1166 }
1167
1169
1171
1172 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1173
1175
1177
1178 bool hasNSAEncoding() const { return HasNSAEncoding; }
1179
1180 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1181
1183
1184 unsigned getNSAMaxSize(bool HasSampler = false) const {
1185 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1186 }
1187
1188 bool hasGFX10_AEncoding() const {
1189 return GFX10_AEncoding;
1190 }
1191
1192 bool hasGFX10_BEncoding() const {
1193 return GFX10_BEncoding;
1194 }
1195
1196 bool hasGFX10_3Insts() const {
1197 return GFX10_3Insts;
1198 }
1199
1200 bool hasMadF16() const;
1201
1202 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1203
1204 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1205
1206 // Scalar and global loads support scale_offset bit.
1207 bool hasScaleOffset() const { return GFX1250Insts; }
1208
1209 bool hasFlatGVSMode() const { return FlatGVSMode; }
1210
1211 // FLAT GLOBAL VOffset is signed
1212 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1213
1214 bool enableSIScheduler() const {
1215 return EnableSIScheduler;
1216 }
1217
1218 bool loadStoreOptEnabled() const {
1219 return EnableLoadStoreOpt;
1220 }
1221
1222 bool hasSGPRInitBug() const {
1223 return SGPRInitBug;
1224 }
1225
1227 return UserSGPRInit16Bug && isWave32();
1228 }
1229
1231
1235
1238 }
1239
1243
1244 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1246 return CIInsts;
1247 }
1248
1251 }
1252
1257
1260 }
1261
1264 }
1265
1268 }
1269
1272 }
1273
1276 }
1277
1278 bool hasLDSMisalignedBug() const {
1279 return LDSMisalignedBug && !EnableCuMode;
1280 }
1281
1283 return HasInstFwdPrefetchBug;
1284 }
1285
1287 return HasVcmpxExecWARHazard;
1288 }
1289
1292 }
1293
1294 // Shift amount of a 64 bit shift cannot be a highest allocated register
1295 // if also at the end of the allocation block.
1297 return GFX90AInsts && !GFX940Insts;
1298 }
1299
1300 // Has one cycle hazard on transcendental instruction feeding a
1301 // non transcendental VALU.
1302 bool hasTransForwardingHazard() const { return GFX940Insts; }
1303
1304 // Has one cycle hazard on a VALU instruction partially writing dst with
1305 // a shift of result bits feeding another VALU instruction.
1307
1308 // Cannot use op_sel with v_dot instructions.
1309 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1310
1311 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1312 bool hasVDecCoExecHazard() const {
1313 return GFX940Insts;
1314 }
1315
1316 bool hasNSAtoVMEMBug() const {
1317 return HasNSAtoVMEMBug;
1318 }
1319
1320 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1321
1322 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1323
1324 bool hasGFX90AInsts() const { return GFX90AInsts; }
1325
1327 return getGeneration() == GFX10;
1328 }
1329
1330 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1331
1332 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1333
1334 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1335
1337 return getGeneration() == GFX11;
1338 }
1339
1341
1343
1344 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1345
1347
1351
1352 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1353
1354 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1355
1357 return GFX1250Insts && getGeneration() == GFX12;
1358 }
1359
1360 /// Return if operations acting on VGPR tuples require even alignment.
1361 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1362
1363 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1364 bool hasSPackHL() const { return GFX11Insts; }
1365
1366 /// Return true if the target's EXP instruction has the COMPR flag, which
1367 /// affects the meaning of the EN (enable) bits.
1368 bool hasCompressedExport() const { return !GFX11Insts; }
1369
1370 /// Return true if the target's EXP instruction supports the NULL export
1371 /// target.
1372 bool hasNullExportTarget() const { return !GFX11Insts; }
1373
1374 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1375
1376 bool hasVOPDInsts() const { return HasVOPDInsts; }
1377
1379
1380 /// Return true if the target has the S_DELAY_ALU instruction.
1381 bool hasDelayAlu() const { return GFX11Insts; }
1382
1383 bool hasPackedTID() const { return HasPackedTID; }
1384
1385 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1386 // hasGFX90AInsts is also true.
1387 bool hasGFX940Insts() const { return GFX940Insts; }
1388
1389 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1390 // hasGFX940Insts and hasGFX90AInsts are also true.
1391 bool hasGFX950Insts() const { return GFX950Insts; }
1392
1393 /// Returns true if the target supports
1394 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1395 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1396 bool hasLDSLoadB96_B128() const {
1397 return hasGFX950Insts();
1398 }
1399
1400 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1401
1402 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1403
1405
1407
1409
1411
1412 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1413 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1414 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1415
1416 /// \returns true if inline constants are not supported for F16 pseudo
1417 /// scalar transcendentals.
1419 return getGeneration() == GFX12;
1420 }
1421
1422 /// \returns true if the target has instructions with xf32 format support.
1423 bool hasXF32Insts() const { return HasXF32Insts; }
1424
1425 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1426
1427 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1428 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1429 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1430
1433 }
1434
1437 }
1438
1439 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1440
1441 bool hasTanhInsts() const { return HasTanhInsts; }
1442
1444
1445 bool hasAddPC64Inst() const { return GFX1250Insts; }
1446
1448
1451 }
1452
1454
1455 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1456 /// GFX1250.
1457 bool hasWaitXCnt() const { return HasWaitXcnt; }
1458
1459 // A single DWORD instructions can use a 64-bit literal.
1460 bool has64BitLiterals() const { return Has64BitLiterals; }
1461
1463
1465
1466 /// \returns The maximum number of instructions that can be enclosed in an
1467 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1468 /// instruction.
1469 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1470
1471 bool hasPrngInst() const { return HasPrngInst; }
1472
1474
1475 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1476 /// SGPRs
1477 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1478
1479 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1480 /// VGPRs
1481 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1482 unsigned DynamicVGPRBlockSize) const;
1483
1484 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1485 /// be achieved when the only function running on a CU is \p F, each workgroup
1486 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1487 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1488 /// range, so this returns a range as well.
1489 ///
1490 /// Note that occupancy can be affected by the scratch allocation as well, but
1491 /// we do not have enough information to compute it.
1492 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1493 unsigned LDSSize = 0,
1494 unsigned NumSGPRs = 0,
1495 unsigned NumVGPRs = 0) const;
1496
1497 /// \returns true if the flat_scratch register should be initialized with the
1498 /// pointer to the wave's scratch memory rather than a size and offset.
1501 }
1502
1503 /// \returns true if the flat_scratch register is initialized by the HW.
1504 /// In this case it is readonly.
1506
1507 /// \returns true if the architected SGPRs are enabled.
1509
1510 /// \returns true if Global Data Share is supported.
1511 bool hasGDS() const { return HasGDS; }
1512
1513 /// \returns true if Global Wave Sync is supported.
1514 bool hasGWS() const { return HasGWS; }
1515
1516 /// \returns true if the machine has merged shaders in which s0-s7 are
1517 /// reserved by the hardware and user SGPRs start at s8
1518 bool hasMergedShaders() const {
1519 return getGeneration() >= GFX9;
1520 }
1521
1522 // \returns true if the target supports the pre-NGG legacy geometry path.
1523 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1524
1525 // \returns true if preloading kernel arguments is supported.
1526 bool hasKernargPreload() const { return KernargPreload; }
1527
1528 // \returns true if the target has split barriers feature
1529 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1530
1531 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1532 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1533
1534 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1535 // no-return form.
1537
1538 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1539 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1540
1541 // \returns true if the target has IEEE kernel descriptor mode bit
1542 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1543
1544 // \returns true if the target has IEEE fminimum/fmaximum instructions
1546
1547 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1548 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1549
1550 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1551 /// values.
1552 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1553
1554 bool hasGFX1250Insts() const { return GFX1250Insts; }
1555
1556 bool hasVOPD3() const { return GFX1250Insts; }
1557
1558 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1559 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1560
1561 // \returns true if the target has V_MAD_U32 instruction.
1562 bool hasMadU32Inst() const { return HasMadU32Inst; }
1563
1564 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1565 bool hasVectorMulU64() const { return GFX1250Insts; }
1566
1567 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1568 // instructions.
1569 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1570
1571 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1572 bool hasIntMinMax64() const { return GFX1250Insts; }
1573
1574 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1575 bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
1576
1577 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1579
1580 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1581 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1582
1583 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1584 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1585
1586 // \returns true if target has S_SETPRIO_INC_WG instruction.
1588
1589 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1590 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1591 // extended VA to 57 bits.
1592 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1593
1594 // \returns true if the target needs to create a prolog for backward
1595 // compatibility when preloading kernel arguments.
1597 return hasKernargPreload() && !GFX1250Insts;
1598 }
1599
1600 /// \returns SGPR allocation granularity supported by the subtarget.
1601 unsigned getSGPRAllocGranule() const {
1603 }
1604
1605 /// \returns SGPR encoding granularity supported by the subtarget.
1606 unsigned getSGPREncodingGranule() const {
1608 }
1609
1610 /// \returns Total number of SGPRs supported by the subtarget.
1611 unsigned getTotalNumSGPRs() const {
1613 }
1614
1615 /// \returns Addressable number of SGPRs supported by the subtarget.
1616 unsigned getAddressableNumSGPRs() const {
1618 }
1619
1620 /// \returns Minimum number of SGPRs that meets the given number of waves per
1621 /// execution unit requirement supported by the subtarget.
1622 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1623 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1624 }
1625
1626 /// \returns Maximum number of SGPRs that meets the given number of waves per
1627 /// execution unit requirement supported by the subtarget.
1628 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1629 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1630 }
1631
1632 /// \returns Reserved number of SGPRs. This is common
1633 /// utility function called by MachineFunction and
1634 /// Function variants of getReservedNumSGPRs.
1635 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1636 /// \returns Reserved number of SGPRs for given machine function \p MF.
1637 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1638
1639 /// \returns Reserved number of SGPRs for given function \p F.
1640 unsigned getReservedNumSGPRs(const Function &F) const;
1641
1642 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1643 unsigned getMaxNumPreloadedSGPRs() const;
1644
1645 /// \returns max num SGPRs. This is the common utility
1646 /// function called by MachineFunction and Function
1647 /// variants of getMaxNumSGPRs.
1648 unsigned getBaseMaxNumSGPRs(const Function &F,
1649 std::pair<unsigned, unsigned> WavesPerEU,
1650 unsigned PreloadedSGPRs,
1651 unsigned ReservedNumSGPRs) const;
1652
1653 /// \returns Maximum number of SGPRs that meets number of waves per execution
1654 /// unit requirement for function \p MF, or number of SGPRs explicitly
1655 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1656 ///
1657 /// \returns Value that meets number of waves per execution unit requirement
1658 /// if explicitly requested value cannot be converted to integer, violates
1659 /// subtarget's specifications, or does not meet number of waves per execution
1660 /// unit requirement.
1661 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1662
1663 /// \returns Maximum number of SGPRs that meets number of waves per execution
1664 /// unit requirement for function \p F, or number of SGPRs explicitly
1665 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1666 ///
1667 /// \returns Value that meets number of waves per execution unit requirement
1668 /// if explicitly requested value cannot be converted to integer, violates
1669 /// subtarget's specifications, or does not meet number of waves per execution
1670 /// unit requirement.
1671 unsigned getMaxNumSGPRs(const Function &F) const;
1672
1673 /// \returns VGPR allocation granularity supported by the subtarget.
1674 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1675 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1676 }
1677
1678 /// \returns VGPR encoding granularity supported by the subtarget.
1679 unsigned getVGPREncodingGranule() const {
1681 }
1682
1683 /// \returns Total number of VGPRs supported by the subtarget.
1684 unsigned getTotalNumVGPRs() const {
1686 }
1687
1688 /// \returns Addressable number of architectural VGPRs supported by the
1689 /// subtarget.
1693
1694 /// \returns Addressable number of VGPRs supported by the subtarget.
1695 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1696 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1697 }
1698
1699 /// \returns the minimum number of VGPRs that will prevent achieving more than
1700 /// the specified number of waves \p WavesPerEU.
1701 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1702 unsigned DynamicVGPRBlockSize) const {
1703 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1704 DynamicVGPRBlockSize);
1705 }
1706
1707 /// \returns the maximum number of VGPRs that can be used and still achieved
1708 /// at least the specified number of waves \p WavesPerEU.
1709 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1710 unsigned DynamicVGPRBlockSize) const {
1711 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1712 DynamicVGPRBlockSize);
1713 }
1714
1715 /// \returns max num VGPRs. This is the common utility function
1716 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1717 unsigned
1719 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1720
1721 /// \returns Maximum number of VGPRs that meets number of waves per execution
1722 /// unit requirement for function \p F, or number of VGPRs explicitly
1723 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1724 ///
1725 /// \returns Value that meets number of waves per execution unit requirement
1726 /// if explicitly requested value cannot be converted to integer, violates
1727 /// subtarget's specifications, or does not meet number of waves per execution
1728 /// unit requirement.
1729 unsigned getMaxNumVGPRs(const Function &F) const;
1730
1731 unsigned getMaxNumAGPRs(const Function &F) const {
1732 return getMaxNumVGPRs(F);
1733 }
1734
1735 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1736 /// of waves per execution unit required for the function \p MF.
1737 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1738
1739 /// \returns Maximum number of VGPRs that meets number of waves per execution
1740 /// unit requirement for function \p MF, or number of VGPRs explicitly
1741 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1742 ///
1743 /// \returns Value that meets number of waves per execution unit requirement
1744 /// if explicitly requested value cannot be converted to integer, violates
1745 /// subtarget's specifications, or does not meet number of waves per execution
1746 /// unit requirement.
1747 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1748
1749 bool supportsWave32() const { return getGeneration() >= GFX10; }
1750
1751 bool supportsWave64() const { return !hasGFX1250Insts(); }
1752
1753 bool isWave32() const {
1754 return getWavefrontSize() == 32;
1755 }
1756
1757 bool isWave64() const {
1758 return getWavefrontSize() == 64;
1759 }
1760
1761 /// Returns if the wavesize of this subtarget is known reliable. This is false
1762 /// only for the a default target-cpu that does not have an explicit
1763 /// +wavefrontsize target feature.
1764 bool isWaveSizeKnown() const {
1765 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1766 hasFeature(AMDGPU::FeatureWavefrontSize64);
1767 }
1768
1770 return getRegisterInfo()->getBoolRC();
1771 }
1772
1773 /// \returns Maximum number of work groups per compute unit supported by the
1774 /// subtarget and limited by given \p FlatWorkGroupSize.
1775 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1776 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1777 }
1778
1779 /// \returns Minimum flat work group size supported by the subtarget.
1780 unsigned getMinFlatWorkGroupSize() const override {
1782 }
1783
1784 /// \returns Maximum flat work group size supported by the subtarget.
1785 unsigned getMaxFlatWorkGroupSize() const override {
1787 }
1788
1789 /// \returns Number of waves per execution unit required to support the given
1790 /// \p FlatWorkGroupSize.
1791 unsigned
1792 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1793 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1794 }
1795
1796 /// \returns Minimum number of waves per execution unit supported by the
1797 /// subtarget.
1798 unsigned getMinWavesPerEU() const override {
1800 }
1801
1802 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1803 SDep &Dep,
1804 const TargetSchedModel *SchedModel) const override;
1805
1806 // \returns true if it's beneficial on this subtarget for the scheduler to
1807 // cluster stores as well as loads.
1808 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1809
1810 // \returns the number of address arguments from which to enable MIMG NSA
1811 // on supported architectures.
1812 unsigned getNSAThreshold(const MachineFunction &MF) const;
1813
1814 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1815 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1817
1818 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1819 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1821
1822 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1823 unsigned getDynamicVGPRBlockSize() const {
1824 return DynamicVGPRBlockSize32 ? 32 : 16;
1825 }
1826
1828 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1829 // to the same register.
1830 return false;
1831 }
1832
1833 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1834 // and surronded by S_WAIT_ALU(0xFFE3).
1836 return getGeneration() == GFX12;
1837 }
1838
1839 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1840 // read.
1842 return GFX1250Insts && getGeneration() == GFX12;
1843 }
1844
1845 /// \returns true if the subtarget supports clusters of workgroups.
1846 bool hasClusters() const { return HasClusters; }
1847
1848 /// \returns true if the subtarget requires a wait for xcnt before atomic
1849 /// flat/global stores & rmw.
1851
1852 /// \returns the number of significant bits in the immediate field of the
1853 /// S_NOP instruction.
1854 unsigned getSNopBits() const {
1856 return 7;
1858 return 4;
1859 return 3;
1860 }
1861
1862 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1863 /// num_records.
1867
1871};
1872
1874public:
1875 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1876
1877 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1878
1879 bool hasDispatchPtr() const { return DispatchPtr; }
1880
1881 bool hasQueuePtr() const { return QueuePtr; }
1882
1883 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1884
1885 bool hasDispatchID() const { return DispatchID; }
1886
1887 bool hasFlatScratchInit() const { return FlatScratchInit; }
1888
1889 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1890
1891 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1892
1893 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1894
1895 unsigned getNumFreeUserSGPRs();
1896
1897 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1898
1909
1910 // Returns the size in number of SGPRs for preload user SGPR field.
1912 switch (ID) {
1914 return 2;
1916 return 4;
1917 case DispatchPtrID:
1918 return 2;
1919 case QueuePtrID:
1920 return 2;
1922 return 2;
1923 case DispatchIdID:
1924 return 2;
1925 case FlatScratchInitID:
1926 return 2;
1928 return 1;
1929 }
1930 llvm_unreachable("Unknown UserSGPRID.");
1931 }
1932
1933 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1934
1935private:
1936 const GCNSubtarget &ST;
1937
1938 // Private memory buffer
1939 // Compute directly in sgpr[0:1]
1940 // Other shaders indirect 64-bits at sgpr[0:1]
1941 bool ImplicitBufferPtr = false;
1942
1943 bool PrivateSegmentBuffer = false;
1944
1945 bool DispatchPtr = false;
1946
1947 bool QueuePtr = false;
1948
1949 bool KernargSegmentPtr = false;
1950
1951 bool DispatchID = false;
1952
1953 bool FlatScratchInit = false;
1954
1955 bool PrivateSegmentSize = false;
1956
1957 unsigned NumKernargPreloadSGPRs = 0;
1958
1959 unsigned NumUsedUserSGPRs = 0;
1960};
1961
1962} // end namespace llvm
1963
1964#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool RequiresWaitsBeforeSystemScopeStores
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool enableTerminalRule() const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasFFBL() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasBFI() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool requiresWaitXCntBeforeAtomicStores() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasCARRY() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool requiresWaitsBeforeSystemScopeStores() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasBFE() const
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.