LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Possibly statically set by tablegen, but may want to be overridden.
70 bool FastDenormalF32 = false;
71 bool HalfRate64Ops = false;
72 bool FullRate64Ops = false;
73
74 // Dynamically set bits that enable features.
75 bool FlatForGlobal = false;
77 bool BackOffBarrier = false;
79 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasCubeInsts = false;
170 bool HasLerpInst = false;
171 bool HasSadInsts = false;
172 bool HasQsadInsts = false;
173 bool HasCvtNormInsts = false;
176 bool HasFP8E5M3Insts = false;
177 bool HasCvtFP8Vop1Bug = false;
178 bool HasPkFmacF16Inst = false;
199 bool HasXF32Insts = false;
200 /// The maximum number of instructions that may be placed within an S_CLAUSE,
201 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
202 /// indicates a lack of S_CLAUSE support.
204 bool SupportsSRAMECC = false;
205 bool DynamicVGPR = false;
207 bool HasVMemToLDSLoad = false;
208 bool RequiresAlignVGPR = false;
209
210 // This should not be used directly. 'TargetID' tracks the dynamic settings
211 // for SRAMECC.
212 bool EnableSRAMECC = false;
213
214 bool HasNoSdstCMPX = false;
215 bool HasVscnt = false;
216 bool HasWaitXcnt = false;
217 bool HasGetWaveIdInst = false;
218 bool HasSMemTimeInst = false;
221 bool HasVOP3Literal = false;
222 bool HasNoDataDepHazard = false;
223 bool FlatAddressSpace = false;
224 bool FlatInstOffsets = false;
225 bool FlatGlobalInsts = false;
226 bool FlatScratchInsts = false;
227 bool FlatGVSMode = false;
230 bool EnableFlatScratch = false;
232 bool HasGDS = false;
233 bool HasGWS = false;
234 bool AddNoCarryInsts = false;
235 bool HasUnpackedD16VMem = false;
236 bool LDSMisalignedBug = false;
239 bool UnalignedDSAccess = false;
240 bool HasPackedTID = false;
241 bool ScalarizeGlobal = false;
242 bool HasSALUFloatInsts = false;
245 bool Has64BitLiterals = false;
247 bool HasBitOp3Insts = false;
248 bool HasTanhInsts = false;
251 bool HasPrngInst = false;
253 bool HasPermlane16Swap = false;
254 bool HasPermlane32Swap = false;
259 bool HasVmemPrefInsts = false;
261 bool HasSafeCUPrefetch = false;
264 bool HasNSAtoVMEMBug = false;
265 bool HasNSAClauseBug = false;
266 bool HasOffset3fBug = false;
272 bool Has1_5xVGPRs = false;
273 bool HasMADIntraFwdBug = false;
274 bool HasVOPDInsts = false;
278 bool HasAshrPkInsts = false;
282 bool HasMin3Max3PKF16 = false;
284 bool HasLshlAddU64Inst = false;
285 bool HasAddSubU64Insts = false;
286 bool HasMadU32Inst = false;
287 bool HasAddMinMaxInsts = false;
292 bool HasSWakeupBarrier = false;
293
294 bool RequiresCOV6 = false;
297
299
300 bool HasClusters = false;
302
303 // Dummy feature to use for assembler in tablegen.
304 bool FeatureDisable = false;
305
306private:
307 SIInstrInfo InstrInfo;
308 SITargetLowering TLInfo;
309 SIFrameLowering FrameLowering;
310
311public:
312 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
313 const GCNTargetMachine &TM);
314 ~GCNSubtarget() override;
315
317 StringRef GPU, StringRef FS);
318
319 /// Diagnose inconsistent subtarget features before attempting to codegen
320 /// function \p F.
321 void checkSubtargetFeatures(const Function &F) const;
322
323 const SIInstrInfo *getInstrInfo() const override {
324 return &InstrInfo;
325 }
326
327 const SIFrameLowering *getFrameLowering() const override {
328 return &FrameLowering;
329 }
330
331 const SITargetLowering *getTargetLowering() const override {
332 return &TLInfo;
333 }
334
335 const SIRegisterInfo *getRegisterInfo() const override {
336 return &InstrInfo.getRegisterInfo();
337 }
338
339 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
340
341 const CallLowering *getCallLowering() const override {
342 return CallLoweringInfo.get();
343 }
344
345 const InlineAsmLowering *getInlineAsmLowering() const override {
346 return InlineAsmLoweringInfo.get();
347 }
348
350 return InstSelector.get();
351 }
352
353 const LegalizerInfo *getLegalizerInfo() const override {
354 return Legalizer.get();
355 }
356
357 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
358 return RegBankInfo.get();
359 }
360
362 return TargetID;
363 }
364
366 return &InstrItins;
367 }
368
370
372 return (Generation)Gen;
373 }
374
375 unsigned getMaxWaveScratchSize() const {
376 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
377 if (getGeneration() >= GFX12) {
378 // 18-bit field in units of 64-dword.
379 return (64 * 4) * ((1 << 18) - 1);
380 }
381 if (getGeneration() == GFX11) {
382 // 15-bit field in units of 64-dword.
383 return (64 * 4) * ((1 << 15) - 1);
384 }
385 // 13-bit field in units of 256-dword.
386 return (256 * 4) * ((1 << 13) - 1);
387 }
388
389 /// Return the number of high bits known to be zero for a frame index.
393
394 int getLDSBankCount() const {
395 return LDSBankCount;
396 }
397
398 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
399 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
400 }
401
402 unsigned getConstantBusLimit(unsigned Opcode) const;
403
404 /// Returns if the result of this instruction with a 16-bit result returned in
405 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
406 /// the original value.
407 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
408
409 bool supportsWGP() const {
410 if (GFX1250Insts)
411 return false;
412 return getGeneration() >= GFX10;
413 }
414
415 bool hasIntClamp() const {
416 return HasIntClamp;
417 }
418
419 bool hasFP64() const {
420 return FP64;
421 }
422
423 bool hasMIMG_R128() const {
424 return MIMG_R128;
425 }
426
427 bool hasHWFP64() const {
428 return FP64;
429 }
430
431 bool hasHalfRate64Ops() const {
432 return HalfRate64Ops;
433 }
434
435 bool hasFullRate64Ops() const {
436 return FullRate64Ops;
437 }
438
439 bool hasAddr64() const {
441 }
442
443 bool hasFlat() const {
445 }
446
447 // Return true if the target only has the reverse operand versions of VALU
448 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
449 bool hasOnlyRevVALUShifts() const {
451 }
452
453 bool hasFractBug() const {
455 }
456
457 bool hasBFE() const {
458 return true;
459 }
460
461 bool hasBFI() const {
462 return true;
463 }
464
465 bool hasBFM() const {
466 return hasBFE();
467 }
468
469 bool hasBCNT(unsigned Size) const {
470 return true;
471 }
472
473 bool hasFFBL() const {
474 return true;
475 }
476
477 bool hasFFBH() const {
478 return true;
479 }
480
481 bool hasMed3_16() const {
483 }
484
485 bool hasMin3Max3_16() const {
487 }
488
489 bool hasFmaMixInsts() const {
490 return HasFmaMixInsts;
491 }
492
493 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
494
495 bool hasCARRY() const {
496 return true;
497 }
498
499 bool hasFMA() const {
500 return FMA;
501 }
502
503 bool hasSwap() const {
504 return GFX9Insts;
505 }
506
507 bool hasScalarPackInsts() const {
508 return GFX9Insts;
509 }
510
511 bool hasScalarMulHiInsts() const {
512 return GFX9Insts;
513 }
514
515 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
516
520
522 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
523 return getGeneration() >= GFX9;
524 }
525
526 /// True if the offset field of DS instructions works as expected. On SI, the
527 /// offset uses a 16-bit adder and does not always wrap properly.
528 bool hasUsableDSOffset() const {
529 return getGeneration() >= SEA_ISLANDS;
530 }
531
535
536 /// Condition output from div_scale is usable.
540
541 /// Extra wait hazard is needed in some cases before
542 /// s_cbranch_vccnz/s_cbranch_vccz.
543 bool hasReadVCCZBug() const {
544 return getGeneration() <= SEA_ISLANDS;
545 }
546
547 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
549 return getGeneration() >= GFX10;
550 }
551
552 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
553 /// was written by a VALU instruction.
556 }
557
558 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
559 /// SGPR was written by a VALU Instruction.
562 }
563
564 bool hasRFEHazards() const {
566 }
567
568 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
569 unsigned getSetRegWaitStates() const {
570 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
571 }
572
573 bool dumpCode() const {
574 return DumpCode;
575 }
576
577 /// Return the amount of LDS that can be used that will not restrict the
578 /// occupancy lower than WaveCount.
579 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
580 const Function &) const;
581
584 }
585
586 /// \returns If target supports S_DENORM_MODE.
587 bool hasDenormModeInst() const {
589 }
590
591 bool useFlatForGlobal() const {
592 return FlatForGlobal;
593 }
594
595 /// \returns If target supports ds_read/write_b128 and user enables generation
596 /// of ds_read/write_b128.
597 bool useDS128() const {
598 return CIInsts && EnableDS128;
599 }
600
601 /// \return If target supports ds_read/write_b96/128.
602 bool hasDS96AndDS128() const {
603 return CIInsts;
604 }
605
606 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
607 bool haveRoundOpsF64() const {
608 return CIInsts;
609 }
610
611 /// \returns If MUBUF instructions always perform range checking, even for
612 /// buffer resources used for private memory access.
616
617 /// \returns If target requires PRT Struct NULL support (zero result registers
618 /// for sparse texture support).
619 bool usePRTStrictNull() const {
620 return EnablePRTStrictNull;
621 }
622
626
627 /// \returns true if the target supports backing off of s_barrier instructions
628 /// when an exception is raised.
630 return BackOffBarrier;
631 }
632
635 }
636
640
641 bool hasUnalignedDSAccess() const {
642 return UnalignedDSAccess;
643 }
644
648
651 }
652
656
658 return UnalignedAccessMode;
659 }
660
662
663 bool hasApertureRegs() const {
664 return HasApertureRegs;
665 }
666
667 bool isTrapHandlerEnabled() const {
668 return TrapHandler;
669 }
670
671 bool isXNACKEnabled() const {
672 return TargetID.isXnackOnOrAny();
673 }
674
675 bool isTgSplitEnabled() const {
676 return EnableTgSplit;
677 }
678
679 bool isCuModeEnabled() const {
680 return EnableCuMode;
681 }
682
684
685 bool hasFlatAddressSpace() const {
686 return FlatAddressSpace;
687 }
688
689 bool hasFlatScrRegister() const {
690 return hasFlatAddressSpace();
691 }
692
693 bool hasFlatInstOffsets() const {
694 return FlatInstOffsets;
695 }
696
697 bool hasFlatGlobalInsts() const {
698 return FlatGlobalInsts;
699 }
700
701 bool hasFlatScratchInsts() const {
702 return FlatScratchInsts;
703 }
704
705 // Check if target supports ST addressing mode with FLAT scratch instructions.
706 // The ST addressing mode means no registers are used, either VGPR or SGPR,
707 // but only immediate offset is swizzled and added to the FLAT scratch base.
708 bool hasFlatScratchSTMode() const {
710 }
711
712 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
713
716 }
717
718 bool enableFlatScratch() const {
719 return flatScratchIsArchitected() ||
721 }
722
723 bool hasGlobalAddTidInsts() const {
724 return GFX10_BEncoding;
725 }
726
727 bool hasAtomicCSub() const {
728 return GFX10_BEncoding;
729 }
730
731 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
732
733 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
734
735 bool hasExportInsts() const {
736 return !hasGFX940Insts() && !hasGFX1250Insts();
737 }
738
739 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
740
741 // DS_ADD_F64/DS_ADD_RTN_F64
742 bool hasLdsAtomicAddF64() const {
743 return hasGFX90AInsts() || hasGFX1250Insts();
744 }
745
747 return getGeneration() >= GFX9;
748 }
749
752 }
753
755 return getGeneration() > GFX9;
756 }
757
758 bool hasD16LoadStore() const {
759 return getGeneration() >= GFX9;
760 }
761
763 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
764 }
765
766 bool hasD16Images() const {
768 }
769
770 /// Return if most LDS instructions have an m0 use that require m0 to be
771 /// initialized.
772 bool ldsRequiresM0Init() const {
773 return getGeneration() < GFX9;
774 }
775
776 // True if the hardware rewinds and replays GWS operations if a wave is
777 // preempted.
778 //
779 // If this is false, a GWS operation requires testing if a nack set the
780 // MEM_VIOL bit, and repeating if so.
781 bool hasGWSAutoReplay() const {
782 return getGeneration() >= GFX9;
783 }
784
785 /// \returns if target has ds_gws_sema_release_all instruction.
786 bool hasGWSSemaReleaseAll() const {
787 return CIInsts;
788 }
789
790 /// \returns true if the target has integer add/sub instructions that do not
791 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
792 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
793 /// for saturation.
794 bool hasAddNoCarry() const {
795 return AddNoCarryInsts;
796 }
797
798 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
799
800 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
801
802 bool hasUnpackedD16VMem() const {
803 return HasUnpackedD16VMem;
804 }
805
806 // Covers VS/PS/CS graphics shaders
807 bool isMesaGfxShader(const Function &F) const {
808 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
809 }
810
811 bool hasMad64_32() const {
812 return getGeneration() >= SEA_ISLANDS;
813 }
814
815 bool hasSDWAOmod() const {
816 return HasSDWAOmod;
817 }
818
819 bool hasSDWAScalar() const {
820 return HasSDWAScalar;
821 }
822
823 bool hasSDWASdst() const {
824 return HasSDWASdst;
825 }
826
827 bool hasSDWAMac() const {
828 return HasSDWAMac;
829 }
830
831 bool hasSDWAOutModsVOPC() const {
832 return HasSDWAOutModsVOPC;
833 }
834
835 bool hasDLInsts() const {
836 return HasDLInsts;
837 }
838
839 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
840
841 bool hasDot1Insts() const {
842 return HasDot1Insts;
843 }
844
845 bool hasDot2Insts() const {
846 return HasDot2Insts;
847 }
848
849 bool hasDot3Insts() const {
850 return HasDot3Insts;
851 }
852
853 bool hasDot4Insts() const {
854 return HasDot4Insts;
855 }
856
857 bool hasDot5Insts() const {
858 return HasDot5Insts;
859 }
860
861 bool hasDot6Insts() const {
862 return HasDot6Insts;
863 }
864
865 bool hasDot7Insts() const {
866 return HasDot7Insts;
867 }
868
869 bool hasDot8Insts() const {
870 return HasDot8Insts;
871 }
872
873 bool hasDot9Insts() const {
874 return HasDot9Insts;
875 }
876
877 bool hasDot10Insts() const {
878 return HasDot10Insts;
879 }
880
881 bool hasDot11Insts() const {
882 return HasDot11Insts;
883 }
884
885 bool hasDot12Insts() const {
886 return HasDot12Insts;
887 }
888
889 bool hasDot13Insts() const {
890 return HasDot13Insts;
891 }
892
893 bool hasMAIInsts() const {
894 return HasMAIInsts;
895 }
896
897 bool hasFP8Insts() const {
898 return HasFP8Insts;
899 }
900
902
903 bool hasCubeInsts() const { return HasCubeInsts; }
904
905 bool hasLerpInst() const { return HasLerpInst; }
906
907 bool hasSadInsts() const { return HasSadInsts; }
908
909 bool hasQsadInsts() const { return HasQsadInsts; }
910
911 bool hasCvtNormInsts() const { return HasCvtNormInsts; }
912
914
916
917 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
918
919 bool hasPkFmacF16Inst() const {
920 return HasPkFmacF16Inst;
921 }
922
926
930
934
938
940
942
946
948
950
954
958
962
966
968
969 /// \return true if the target has flat, global, and buffer atomic fadd for
970 /// double.
974
975 /// \return true if the target's flat, global, and buffer atomic fadd for
976 /// float supports denormal handling.
980
981 /// \return true if atomic operations targeting fine-grained memory work
982 /// correctly at device scope, in allocations in host or peer PCIe device
983 /// memory.
987
988 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
989 /// via CAS loop.
993
995
999
1000 bool hasNoSdstCMPX() const {
1001 return HasNoSdstCMPX;
1002 }
1003
1004 bool hasVscnt() const {
1005 return HasVscnt;
1006 }
1007
1008 bool hasGetWaveIdInst() const {
1009 return HasGetWaveIdInst;
1010 }
1011
1012 bool hasSMemTimeInst() const {
1013 return HasSMemTimeInst;
1014 }
1015
1018 }
1019
1023
1024 bool hasVOP3Literal() const {
1025 return HasVOP3Literal;
1026 }
1027
1028 bool hasNoDataDepHazard() const {
1029 return HasNoDataDepHazard;
1030 }
1031
1033 return getGeneration() < SEA_ISLANDS;
1034 }
1035
1036 bool hasInstPrefetch() const {
1037 return getGeneration() == GFX10 || getGeneration() == GFX11;
1038 }
1039
1040 bool hasPrefetch() const { return GFX12Insts; }
1041
1042 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1043
1045
1046 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1047
1048 // Has s_cmpk_* instructions.
1049 bool hasSCmpK() const { return getGeneration() < GFX12; }
1050
1051 // Scratch is allocated in 256 dword per wave blocks for the entire
1052 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1053 // is 4-byte aligned.
1054 //
1055 // Only 4-byte alignment is really needed to access anything. Transformations
1056 // on the pointer value itself may rely on the alignment / known low bits of
1057 // the pointer. Set this to something above the minimum to avoid needing
1058 // dynamic realignment in common cases.
1059 Align getStackAlignment() const { return Align(16); }
1060
1061 bool enableMachineScheduler() const override {
1062 return true;
1063 }
1064
1065 bool useAA() const override;
1066
1067 bool enableSubRegLiveness() const override {
1068 return true;
1069 }
1070
1073
1074 // static wrappers
1075 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1076
1077 // XXX - Why is this here if it isn't in the default pass set?
1078 bool enableEarlyIfConversion() const override {
1079 return true;
1080 }
1081
1083 const SchedRegion &Region) const override;
1084
1086 const SchedRegion &Region) const override;
1087
1088 void mirFileLoaded(MachineFunction &MF) const override;
1089
1090 unsigned getMaxNumUserSGPRs() const {
1091 return AMDGPU::getMaxNumUserSGPRs(*this);
1092 }
1093
1094 bool hasSMemRealTime() const {
1095 return HasSMemRealTime;
1096 }
1097
1098 bool hasMovrel() const {
1099 return HasMovrel;
1100 }
1101
1102 bool hasVGPRIndexMode() const {
1103 return HasVGPRIndexMode;
1104 }
1105
1106 bool useVGPRIndexMode() const;
1107
1109 return getGeneration() >= VOLCANIC_ISLANDS;
1110 }
1111
1113
1114 bool hasScalarStores() const {
1115 return HasScalarStores;
1116 }
1117
1118 bool hasScalarAtomics() const {
1119 return HasScalarAtomics;
1120 }
1121
1122 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1124
1125 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1126 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1127
1128 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1129 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1130
1131 bool hasDPP() const {
1132 return HasDPP;
1133 }
1134
1135 bool hasDPPBroadcasts() const {
1136 return HasDPP && getGeneration() < GFX10;
1137 }
1138
1140 return HasDPP && getGeneration() < GFX10;
1141 }
1142
1143 bool hasDPP8() const {
1144 return HasDPP8;
1145 }
1146
1147 bool hasDPALU_DPP() const {
1148 return HasDPALU_DPP;
1149 }
1150
1151 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1152
1153 bool hasPackedFP32Ops() const {
1154 return HasPackedFP32Ops;
1155 }
1156
1157 // Has V_PK_MOV_B32 opcode
1158 bool hasPkMovB32() const {
1159 return GFX90AInsts;
1160 }
1161
1163 return getGeneration() >= GFX10 || hasGFX940Insts();
1164 }
1165
1166 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1167
1168 bool hasImageInsts() const {
1169 return HasImageInsts;
1170 }
1171
1173 return HasExtendedImageInsts;
1174 }
1175
1176 bool hasR128A16() const {
1177 return HasR128A16;
1178 }
1179
1180 bool hasA16() const { return HasA16; }
1181
1182 bool hasG16() const { return HasG16; }
1183
1184 bool hasOffset3fBug() const {
1185 return HasOffset3fBug;
1186 }
1187
1189
1191
1192 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1193
1195
1197
1198 bool hasNSAEncoding() const { return HasNSAEncoding; }
1199
1200 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1201
1203
1204 unsigned getNSAMaxSize(bool HasSampler = false) const {
1205 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1206 }
1207
1208 bool hasGFX10_AEncoding() const {
1209 return GFX10_AEncoding;
1210 }
1211
1212 bool hasGFX10_BEncoding() const {
1213 return GFX10_BEncoding;
1214 }
1215
1216 bool hasGFX10_3Insts() const {
1217 return GFX10_3Insts;
1218 }
1219
1220 bool hasMadF16() const;
1221
1222 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1223
1224 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1225
1226 // Scalar and global loads support scale_offset bit.
1227 bool hasScaleOffset() const { return GFX1250Insts; }
1228
1229 bool hasFlatGVSMode() const { return FlatGVSMode; }
1230
1231 // FLAT GLOBAL VOffset is signed
1232 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1233
1234 bool enableSIScheduler() const {
1235 return EnableSIScheduler;
1236 }
1237
1238 bool loadStoreOptEnabled() const {
1239 return EnableLoadStoreOpt;
1240 }
1241
1242 bool hasSGPRInitBug() const {
1243 return SGPRInitBug;
1244 }
1245
1247 return UserSGPRInit16Bug && isWave32();
1248 }
1249
1251
1255
1258 }
1259
1263
1264 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1266 return CIInsts;
1267 }
1268
1271 }
1272
1277
1280 }
1281
1284 }
1285
1288 }
1289
1292 }
1293
1296 }
1297
1298 bool hasLDSMisalignedBug() const {
1299 return LDSMisalignedBug && !EnableCuMode;
1300 }
1301
1303 return HasInstFwdPrefetchBug;
1304 }
1305
1307 return HasVcmpxExecWARHazard;
1308 }
1309
1312 }
1313
1314 // Shift amount of a 64 bit shift cannot be a highest allocated register
1315 // if also at the end of the allocation block.
1317 return GFX90AInsts && !GFX940Insts;
1318 }
1319
1320 // Has one cycle hazard on transcendental instruction feeding a
1321 // non transcendental VALU.
1322 bool hasTransForwardingHazard() const { return GFX940Insts; }
1323
1324 // Has one cycle hazard on a VALU instruction partially writing dst with
1325 // a shift of result bits feeding another VALU instruction.
1327
1328 // Cannot use op_sel with v_dot instructions.
1329 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1330
1331 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1332 bool hasVDecCoExecHazard() const {
1333 return GFX940Insts;
1334 }
1335
1336 bool hasNSAtoVMEMBug() const {
1337 return HasNSAtoVMEMBug;
1338 }
1339
1340 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1341
1342 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1343
1344 bool hasGFX90AInsts() const { return GFX90AInsts; }
1345
1347 return getGeneration() == GFX10;
1348 }
1349
1350 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1351
1352 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1353
1354 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1355
1357 return getGeneration() == GFX11;
1358 }
1359
1361
1363
1364 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1365
1367
1371
1372 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1373
1374 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1375
1377 return GFX1250Insts && getGeneration() == GFX12;
1378 }
1379
1380 /// Return if operations acting on VGPR tuples require even alignment.
1381 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1382
1383 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1384 bool hasSPackHL() const { return GFX11Insts; }
1385
1386 /// Return true if the target's EXP instruction has the COMPR flag, which
1387 /// affects the meaning of the EN (enable) bits.
1388 bool hasCompressedExport() const { return !GFX11Insts; }
1389
1390 /// Return true if the target's EXP instruction supports the NULL export
1391 /// target.
1392 bool hasNullExportTarget() const { return !GFX11Insts; }
1393
1394 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1395
1396 bool hasVOPDInsts() const { return HasVOPDInsts; }
1397
1399
1400 /// Return true if the target has the S_DELAY_ALU instruction.
1401 bool hasDelayAlu() const { return GFX11Insts; }
1402
1403 bool hasPackedTID() const { return HasPackedTID; }
1404
1405 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1406 // hasGFX90AInsts is also true.
1407 bool hasGFX940Insts() const { return GFX940Insts; }
1408
1409 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1410 // hasGFX940Insts and hasGFX90AInsts are also true.
1411 bool hasGFX950Insts() const { return GFX950Insts; }
1412
1413 /// Returns true if the target supports
1414 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1415 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1416 bool hasLDSLoadB96_B128() const {
1417 return hasGFX950Insts();
1418 }
1419
1420 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1421
1422 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1423
1425
1427
1429
1431
1432 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1433 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1434 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1435
1436 /// \returns true if inline constants are not supported for F16 pseudo
1437 /// scalar transcendentals.
1439 return getGeneration() == GFX12;
1440 }
1441
1442 /// \returns true if the target has instructions with xf32 format support.
1443 bool hasXF32Insts() const { return HasXF32Insts; }
1444
1445 /// \returns true if the target has packed f32 instructions that only read 32
1446 /// bits from a scalar operand (SGPR or literal) and replicates the bits to
1447 /// both channels.
1451
1452 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1453
1454 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1455 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1456 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1457
1460 }
1461
1464 }
1465
1466 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1467
1468 bool hasTanhInsts() const { return HasTanhInsts; }
1469
1471
1472 bool hasAddPC64Inst() const { return GFX1250Insts; }
1473
1475
1478 }
1479
1481
1482 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1483 /// GFX1250.
1484 bool hasWaitXCnt() const { return HasWaitXcnt; }
1485
1486 // A single DWORD instructions can use a 64-bit literal.
1487 bool has64BitLiterals() const { return Has64BitLiterals; }
1488
1490
1492
1493 /// \returns The maximum number of instructions that can be enclosed in an
1494 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1495 /// instruction.
1496 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1497
1498 bool hasPrngInst() const { return HasPrngInst; }
1499
1501
1502 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1503 /// SGPRs
1504 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1505
1506 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1507 /// VGPRs
1508 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1509 unsigned DynamicVGPRBlockSize) const;
1510
1511 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1512 /// be achieved when the only function running on a CU is \p F, each workgroup
1513 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1514 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1515 /// range, so this returns a range as well.
1516 ///
1517 /// Note that occupancy can be affected by the scratch allocation as well, but
1518 /// we do not have enough information to compute it.
1519 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1520 unsigned LDSSize = 0,
1521 unsigned NumSGPRs = 0,
1522 unsigned NumVGPRs = 0) const;
1523
1524 /// \returns true if the flat_scratch register should be initialized with the
1525 /// pointer to the wave's scratch memory rather than a size and offset.
1528 }
1529
1530 /// \returns true if the flat_scratch register is initialized by the HW.
1531 /// In this case it is readonly.
1533
1534 /// \returns true if the architected SGPRs are enabled.
1536
1537 /// \returns true if Global Data Share is supported.
1538 bool hasGDS() const { return HasGDS; }
1539
1540 /// \returns true if Global Wave Sync is supported.
1541 bool hasGWS() const { return HasGWS; }
1542
1543 /// \returns true if the machine has merged shaders in which s0-s7 are
1544 /// reserved by the hardware and user SGPRs start at s8
1545 bool hasMergedShaders() const {
1546 return getGeneration() >= GFX9;
1547 }
1548
1549 // \returns true if the target supports the pre-NGG legacy geometry path.
1550 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1551
1552 // \returns true if preloading kernel arguments is supported.
1553 bool hasKernargPreload() const { return KernargPreload; }
1554
1555 // \returns true if the target has split barriers feature
1556 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1557
1558 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1559 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1560
1561 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1562 // no-return form.
1564
1565 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1566 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1567
1568 // \returns true if the target has IEEE kernel descriptor mode bit
1569 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1570
1571 // \returns true if the target has IEEE fminimum/fmaximum instructions
1573
1574 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1575 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1576
1577 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1578 /// values.
1579 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1580
1581 bool hasGFX1250Insts() const { return GFX1250Insts; }
1582
1583 bool hasVOPD3() const { return GFX1250Insts; }
1584
1585 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1586 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1587
1588 // \returns true if the target has V_MAD_U32 instruction.
1589 bool hasMadU32Inst() const { return HasMadU32Inst; }
1590
1591 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1592 bool hasVectorMulU64() const { return GFX1250Insts; }
1593
1594 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1595 // instructions.
1596 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1597
1598 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1599 bool hasIntMinMax64() const { return GFX1250Insts; }
1600
1601 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1602 bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
1603
1604 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1606
1607 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1608 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1609
1610 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1611 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1612
1613 // \returns true if target has S_SETPRIO_INC_WG instruction.
1615
1616 // \returns true if target has S_WAKEUP_BARRIER instruction.
1617 bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }
1618
1619 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1620 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1621 // extended VA to 57 bits.
1622 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1623
1624 // \returns true if the target needs to create a prolog for backward
1625 // compatibility when preloading kernel arguments.
1627 return hasKernargPreload() && !GFX1250Insts;
1628 }
1629
1630 bool hasCondSubInsts() const { return GFX12Insts; }
1631
1632 bool hasSubClampInsts() const { return hasGFX10_3Insts(); }
1633
1634 /// \returns SGPR allocation granularity supported by the subtarget.
1635 unsigned getSGPRAllocGranule() const {
1637 }
1638
1639 /// \returns SGPR encoding granularity supported by the subtarget.
1640 unsigned getSGPREncodingGranule() const {
1642 }
1643
1644 /// \returns Total number of SGPRs supported by the subtarget.
1645 unsigned getTotalNumSGPRs() const {
1647 }
1648
1649 /// \returns Addressable number of SGPRs supported by the subtarget.
1650 unsigned getAddressableNumSGPRs() const {
1652 }
1653
1654 /// \returns Minimum number of SGPRs that meets the given number of waves per
1655 /// execution unit requirement supported by the subtarget.
1656 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1657 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1658 }
1659
1660 /// \returns Maximum number of SGPRs that meets the given number of waves per
1661 /// execution unit requirement supported by the subtarget.
1662 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1663 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1664 }
1665
1666 /// \returns Reserved number of SGPRs. This is common
1667 /// utility function called by MachineFunction and
1668 /// Function variants of getReservedNumSGPRs.
1669 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1670 /// \returns Reserved number of SGPRs for given machine function \p MF.
1671 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1672
1673 /// \returns Reserved number of SGPRs for given function \p F.
1674 unsigned getReservedNumSGPRs(const Function &F) const;
1675
1676 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1677 unsigned getMaxNumPreloadedSGPRs() const;
1678
1679 /// \returns max num SGPRs. This is the common utility
1680 /// function called by MachineFunction and Function
1681 /// variants of getMaxNumSGPRs.
1682 unsigned getBaseMaxNumSGPRs(const Function &F,
1683 std::pair<unsigned, unsigned> WavesPerEU,
1684 unsigned PreloadedSGPRs,
1685 unsigned ReservedNumSGPRs) const;
1686
1687 /// \returns Maximum number of SGPRs that meets number of waves per execution
1688 /// unit requirement for function \p MF, or number of SGPRs explicitly
1689 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1690 ///
1691 /// \returns Value that meets number of waves per execution unit requirement
1692 /// if explicitly requested value cannot be converted to integer, violates
1693 /// subtarget's specifications, or does not meet number of waves per execution
1694 /// unit requirement.
1695 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1696
1697 /// \returns Maximum number of SGPRs that meets number of waves per execution
1698 /// unit requirement for function \p F, or number of SGPRs explicitly
1699 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1700 ///
1701 /// \returns Value that meets number of waves per execution unit requirement
1702 /// if explicitly requested value cannot be converted to integer, violates
1703 /// subtarget's specifications, or does not meet number of waves per execution
1704 /// unit requirement.
1705 unsigned getMaxNumSGPRs(const Function &F) const;
1706
1707 /// \returns VGPR allocation granularity supported by the subtarget.
1708 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1709 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1710 }
1711
1712 /// \returns VGPR encoding granularity supported by the subtarget.
1713 unsigned getVGPREncodingGranule() const {
1715 }
1716
1717 /// \returns Total number of VGPRs supported by the subtarget.
1718 unsigned getTotalNumVGPRs() const {
1720 }
1721
1722 /// \returns Addressable number of architectural VGPRs supported by the
1723 /// subtarget.
1727
1728 /// \returns Addressable number of VGPRs supported by the subtarget.
1729 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1730 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1731 }
1732
1733 /// \returns the minimum number of VGPRs that will prevent achieving more than
1734 /// the specified number of waves \p WavesPerEU.
1735 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1736 unsigned DynamicVGPRBlockSize) const {
1737 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1738 DynamicVGPRBlockSize);
1739 }
1740
1741 /// \returns the maximum number of VGPRs that can be used and still achieved
1742 /// at least the specified number of waves \p WavesPerEU.
1743 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1744 unsigned DynamicVGPRBlockSize) const {
1745 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1746 DynamicVGPRBlockSize);
1747 }
1748
1749 /// \returns max num VGPRs. This is the common utility function
1750 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1751 unsigned
1753 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1754
1755 /// \returns Maximum number of VGPRs that meets number of waves per execution
1756 /// unit requirement for function \p F, or number of VGPRs explicitly
1757 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1758 ///
1759 /// \returns Value that meets number of waves per execution unit requirement
1760 /// if explicitly requested value cannot be converted to integer, violates
1761 /// subtarget's specifications, or does not meet number of waves per execution
1762 /// unit requirement.
1763 unsigned getMaxNumVGPRs(const Function &F) const;
1764
1765 unsigned getMaxNumAGPRs(const Function &F) const {
1766 return getMaxNumVGPRs(F);
1767 }
1768
1769 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1770 /// of waves per execution unit required for the function \p MF.
1771 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1772
1773 /// \returns Maximum number of VGPRs that meets number of waves per execution
1774 /// unit requirement for function \p MF, or number of VGPRs explicitly
1775 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1776 ///
1777 /// \returns Value that meets number of waves per execution unit requirement
1778 /// if explicitly requested value cannot be converted to integer, violates
1779 /// subtarget's specifications, or does not meet number of waves per execution
1780 /// unit requirement.
1781 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1782
1783 bool supportsWave32() const { return getGeneration() >= GFX10; }
1784
1785 bool supportsWave64() const { return !hasGFX1250Insts(); }
1786
1787 bool isWave32() const {
1788 return getWavefrontSize() == 32;
1789 }
1790
1791 bool isWave64() const {
1792 return getWavefrontSize() == 64;
1793 }
1794
1795 /// Returns if the wavesize of this subtarget is known reliable. This is false
1796 /// only for the a default target-cpu that does not have an explicit
1797 /// +wavefrontsize target feature.
1798 bool isWaveSizeKnown() const {
1799 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1800 hasFeature(AMDGPU::FeatureWavefrontSize64);
1801 }
1802
1804 return getRegisterInfo()->getBoolRC();
1805 }
1806
1807 /// \returns Maximum number of work groups per compute unit supported by the
1808 /// subtarget and limited by given \p FlatWorkGroupSize.
1809 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1810 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1811 }
1812
1813 /// \returns Minimum flat work group size supported by the subtarget.
1814 unsigned getMinFlatWorkGroupSize() const override {
1816 }
1817
1818 /// \returns Maximum flat work group size supported by the subtarget.
1819 unsigned getMaxFlatWorkGroupSize() const override {
1821 }
1822
1823 /// \returns Number of waves per execution unit required to support the given
1824 /// \p FlatWorkGroupSize.
1825 unsigned
1826 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1827 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1828 }
1829
1830 /// \returns Minimum number of waves per execution unit supported by the
1831 /// subtarget.
1832 unsigned getMinWavesPerEU() const override {
1834 }
1835
1836 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1837 SDep &Dep,
1838 const TargetSchedModel *SchedModel) const override;
1839
1840 // \returns true if it's beneficial on this subtarget for the scheduler to
1841 // cluster stores as well as loads.
1842 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1843
1844 // \returns the number of address arguments from which to enable MIMG NSA
1845 // on supported architectures.
1846 unsigned getNSAThreshold(const MachineFunction &MF) const;
1847
1848 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1849 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1851
1852 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1853 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1855
1856 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1857 unsigned getDynamicVGPRBlockSize() const {
1858 return DynamicVGPRBlockSize32 ? 32 : 16;
1859 }
1860
1862 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1863 // to the same register.
1864 return false;
1865 }
1866
1867 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1868 // and surronded by S_WAIT_ALU(0xFFE3).
1870 return getGeneration() == GFX12;
1871 }
1872
1873 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1874 // read.
1876 return GFX1250Insts && getGeneration() == GFX12;
1877 }
1878
1879 // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit
1880 // result.
1882 return GFX1250Insts && getGeneration() == GFX12;
1883 }
1884
1885 /// \returns true if the subtarget supports clusters of workgroups.
1886 bool hasClusters() const { return HasClusters; }
1887
1888 /// \returns true if the subtarget requires a wait for xcnt before VMEM
1889 /// accesses that must never be repeated in the event of a page fault/re-try.
1890 /// Atomic stores/rmw and all volatile accesses fall under this criteria.
1894
1895 /// \returns the number of significant bits in the immediate field of the
1896 /// S_NOP instruction.
1897 unsigned getSNopBits() const {
1899 return 7;
1901 return 4;
1902 return 3;
1903 }
1904
1905 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1906 /// num_records.
1910
1914};
1915
1917public:
1918 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1919
1920 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1921
1922 bool hasDispatchPtr() const { return DispatchPtr; }
1923
1924 bool hasQueuePtr() const { return QueuePtr; }
1925
1926 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1927
1928 bool hasDispatchID() const { return DispatchID; }
1929
1930 bool hasFlatScratchInit() const { return FlatScratchInit; }
1931
1932 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1933
1934 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1935
1936 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1937
1938 unsigned getNumFreeUserSGPRs();
1939
1940 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1941
1952
1953 // Returns the size in number of SGPRs for preload user SGPR field.
1955 switch (ID) {
1957 return 2;
1959 return 4;
1960 case DispatchPtrID:
1961 return 2;
1962 case QueuePtrID:
1963 return 2;
1965 return 2;
1966 case DispatchIdID:
1967 return 2;
1968 case FlatScratchInitID:
1969 return 2;
1971 return 1;
1972 }
1973 llvm_unreachable("Unknown UserSGPRID.");
1974 }
1975
1976 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1977
1978private:
1979 const GCNSubtarget &ST;
1980
1981 // Private memory buffer
1982 // Compute directly in sgpr[0:1]
1983 // Other shaders indirect 64-bits at sgpr[0:1]
1984 bool ImplicitBufferPtr = false;
1985
1986 bool PrivateSegmentBuffer = false;
1987
1988 bool DispatchPtr = false;
1989
1990 bool QueuePtr = false;
1991
1992 bool KernargSegmentPtr = false;
1993
1994 bool DispatchID = false;
1995
1996 bool FlatScratchInit = false;
1997
1998 bool PrivateSegmentSize = false;
1999
2000 unsigned NumKernargPreloadSGPRs = 0;
2001
2002 unsigned NumUsedUserSGPRs = 0;
2003};
2004
2005} // end namespace llvm
2006
2007#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasFlatScratchHiInB64InstHazard() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
bool hasSWakeupBarrier() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
bool hasCondSubInsts() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasCvtPkNormVOP3Insts() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool RequiresWaitsBeforeSystemScopeStores
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool hasCvtNormInsts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasLerpInst() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasFFBL() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasCubeInsts() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasBFI() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasSubClampInsts() const
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool requiresWaitXCntForSingleAccessInstructions() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasSadInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasCvtPkNormVOP2Insts() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasCARRY() const
bool hasQsadInsts() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool requiresWaitsBeforeSystemScopeStores() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasBFE() const
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.