LLVM 23.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Possibly statically set by tablegen, but may want to be overridden.
70 bool FastDenormalF32 = false;
71 bool HalfRate64Ops = false;
72 bool FullRate64Ops = false;
73
74 // Dynamically set bits that enable features.
75 bool FlatForGlobal = false;
77 bool BackOffBarrier = false;
79 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasMcastLoadInsts = false;
170 bool HasCubeInsts = false;
171 bool HasLerpInst = false;
172 bool HasSadInsts = false;
173 bool HasQsadInsts = false;
174 bool HasCvtNormInsts = false;
177 bool HasFP8E5M3Insts = false;
178 bool HasCvtFP8Vop1Bug = false;
179 bool HasPkFmacF16Inst = false;
200 bool HasXF32Insts = false;
201 /// The maximum number of instructions that may be placed within an S_CLAUSE,
202 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
203 /// indicates a lack of S_CLAUSE support.
205 bool SupportsSRAMECC = false;
206 bool DynamicVGPR = false;
208 bool HasVMemToLDSLoad = false;
209 bool RequiresAlignVGPR = false;
210
211 // This should not be used directly. 'TargetID' tracks the dynamic settings
212 // for SRAMECC.
213 bool EnableSRAMECC = false;
214
215 bool HasNoSdstCMPX = false;
216 bool HasVscnt = false;
217 bool HasWaitXcnt = false;
218 bool HasGetWaveIdInst = false;
219 bool HasSMemTimeInst = false;
222 bool HasVOP3Literal = false;
223 bool HasNoDataDepHazard = false;
224 bool FlatAddressSpace = false;
225 bool FlatInstOffsets = false;
226 bool FlatGlobalInsts = false;
227 bool FlatScratchInsts = false;
228 bool FlatGVSMode = false;
231 bool EnableFlatScratch = false;
233 bool HasGDS = false;
234 bool HasGWS = false;
235 bool AddNoCarryInsts = false;
236 bool HasUnpackedD16VMem = false;
237 bool LDSMisalignedBug = false;
240 bool UnalignedDSAccess = false;
241 bool HasPackedTID = false;
242 bool ScalarizeGlobal = false;
243 bool HasSALUFloatInsts = false;
246 bool Has64BitLiterals = false;
249 bool HasBitOp3Insts = false;
250 bool HasTanhInsts = false;
253 bool HasPrngInst = false;
255 bool HasPermlane16Swap = false;
256 bool HasPermlane32Swap = false;
261 bool HasVmemPrefInsts = false;
263 bool HasSafeCUPrefetch = false;
266 bool HasNSAtoVMEMBug = false;
267 bool HasNSAClauseBug = false;
268 bool HasOffset3fBug = false;
274 bool Has1_5xVGPRs = false;
275 bool HasMADIntraFwdBug = false;
276 bool HasVOPDInsts = false;
280 bool HasAshrPkInsts = false;
284 bool HasMin3Max3PKF16 = false;
286 bool HasLshlAddU64Inst = false;
287 bool HasAddSubU64Insts = false;
288 bool HasMadU32Inst = false;
289 bool HasAddMinMaxInsts = false;
294 bool HasSWakeupBarrier = false;
295
296 bool RequiresCOV6 = false;
299
301
302 bool HasClusters = false;
304 bool UseAddPC64Inst = false;
305
306 // Dummy feature to use for assembler in tablegen.
307 bool FeatureDisable = false;
308
309private:
310 SIInstrInfo InstrInfo;
311 SITargetLowering TLInfo;
312 SIFrameLowering FrameLowering;
313
314public:
315 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
316 const GCNTargetMachine &TM);
317 ~GCNSubtarget() override;
318
320 StringRef GPU, StringRef FS);
321
322 /// Diagnose inconsistent subtarget features before attempting to codegen
323 /// function \p F.
324 void checkSubtargetFeatures(const Function &F) const;
325
326 const SIInstrInfo *getInstrInfo() const override {
327 return &InstrInfo;
328 }
329
330 const SIFrameLowering *getFrameLowering() const override {
331 return &FrameLowering;
332 }
333
334 const SITargetLowering *getTargetLowering() const override {
335 return &TLInfo;
336 }
337
338 const SIRegisterInfo *getRegisterInfo() const override {
339 return &InstrInfo.getRegisterInfo();
340 }
341
342 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
343
344 const CallLowering *getCallLowering() const override {
345 return CallLoweringInfo.get();
346 }
347
348 const InlineAsmLowering *getInlineAsmLowering() const override {
349 return InlineAsmLoweringInfo.get();
350 }
351
353 return InstSelector.get();
354 }
355
356 const LegalizerInfo *getLegalizerInfo() const override {
357 return Legalizer.get();
358 }
359
360 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
361 return RegBankInfo.get();
362 }
363
365 return TargetID;
366 }
367
369 return &InstrItins;
370 }
371
373
375 return (Generation)Gen;
376 }
377
378 unsigned getMaxWaveScratchSize() const {
379 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
380 if (getGeneration() >= GFX12) {
381 // 18-bit field in units of 64-dword.
382 return (64 * 4) * ((1 << 18) - 1);
383 }
384 if (getGeneration() == GFX11) {
385 // 15-bit field in units of 64-dword.
386 return (64 * 4) * ((1 << 15) - 1);
387 }
388 // 13-bit field in units of 256-dword.
389 return (256 * 4) * ((1 << 13) - 1);
390 }
391
392 /// Return the number of high bits known to be zero for a frame index.
396
397 int getLDSBankCount() const {
398 return LDSBankCount;
399 }
400
401 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
402 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
403 }
404
405 unsigned getConstantBusLimit(unsigned Opcode) const;
406
407 /// Returns if the result of this instruction with a 16-bit result returned in
408 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
409 /// the original value.
410 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
411
412 bool supportsWGP() const {
413 if (GFX1250Insts)
414 return false;
415 return getGeneration() >= GFX10;
416 }
417
418 bool hasIntClamp() const {
419 return HasIntClamp;
420 }
421
422 bool hasFP64() const {
423 return FP64;
424 }
425
426 bool hasMIMG_R128() const {
427 return MIMG_R128;
428 }
429
430 bool hasHWFP64() const {
431 return FP64;
432 }
433
434 bool hasHalfRate64Ops() const {
435 return HalfRate64Ops;
436 }
437
438 bool hasFullRate64Ops() const {
439 return FullRate64Ops;
440 }
441
442 bool hasAddr64() const {
444 }
445
446 bool hasFlat() const {
448 }
449
450 // Return true if the target only has the reverse operand versions of VALU
451 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
452 bool hasOnlyRevVALUShifts() const {
454 }
455
456 bool hasFractBug() const {
458 }
459
460 bool hasMed3_16() const {
462 }
463
464 bool hasMin3Max3_16() const {
466 }
467
468 bool hasFmaMixInsts() const {
469 return HasFmaMixInsts;
470 }
471
472 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
473
474 bool hasFMA() const {
475 return FMA;
476 }
477
478 bool hasSwap() const {
479 return GFX9Insts;
480 }
481
482 bool hasScalarPackInsts() const {
483 return GFX9Insts;
484 }
485
486 bool hasScalarMulHiInsts() const {
487 return GFX9Insts;
488 }
489
490 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
491
495
497 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
498 return getGeneration() >= GFX9;
499 }
500
501 /// True if the offset field of DS instructions works as expected. On SI, the
502 /// offset uses a 16-bit adder and does not always wrap properly.
503 bool hasUsableDSOffset() const {
504 return getGeneration() >= SEA_ISLANDS;
505 }
506
510
511 /// Condition output from div_scale is usable.
515
516 /// Extra wait hazard is needed in some cases before
517 /// s_cbranch_vccnz/s_cbranch_vccz.
518 bool hasReadVCCZBug() const {
519 return getGeneration() <= SEA_ISLANDS;
520 }
521
522 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
524 return getGeneration() >= GFX10;
525 }
526
527 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
528 /// was written by a VALU instruction.
531 }
532
533 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
534 /// SGPR was written by a VALU Instruction.
537 }
538
539 bool hasRFEHazards() const {
541 }
542
543 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
544 unsigned getSetRegWaitStates() const {
545 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
546 }
547
548 bool dumpCode() const {
549 return DumpCode;
550 }
551
552 /// Return the amount of LDS that can be used that will not restrict the
553 /// occupancy lower than WaveCount.
554 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
555 const Function &) const;
556
559 }
560
561 /// \returns If target supports S_DENORM_MODE.
562 bool hasDenormModeInst() const {
564 }
565
566 bool useFlatForGlobal() const {
567 return FlatForGlobal;
568 }
569
570 /// \returns If target supports ds_read/write_b128 and user enables generation
571 /// of ds_read/write_b128.
572 bool useDS128() const {
573 return CIInsts && EnableDS128;
574 }
575
576 /// \return If target supports ds_read/write_b96/128.
577 bool hasDS96AndDS128() const {
578 return CIInsts;
579 }
580
581 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
582 bool haveRoundOpsF64() const {
583 return CIInsts;
584 }
585
586 /// \returns If MUBUF instructions always perform range checking, even for
587 /// buffer resources used for private memory access.
591
592 /// \returns If target requires PRT Struct NULL support (zero result registers
593 /// for sparse texture support).
594 bool usePRTStrictNull() const {
595 return EnablePRTStrictNull;
596 }
597
601
602 /// \returns true if the target supports backing off of s_barrier instructions
603 /// when an exception is raised.
605 return BackOffBarrier;
606 }
607
610 }
611
615
616 bool hasUnalignedDSAccess() const {
617 return UnalignedDSAccess;
618 }
619
623
626 }
627
631
633 return UnalignedAccessMode;
634 }
635
637
638 bool hasApertureRegs() const {
639 return HasApertureRegs;
640 }
641
642 bool isTrapHandlerEnabled() const {
643 return TrapHandler;
644 }
645
646 bool isXNACKEnabled() const {
647 return TargetID.isXnackOnOrAny();
648 }
649
650 bool isTgSplitEnabled() const {
651 return EnableTgSplit;
652 }
653
654 bool isCuModeEnabled() const {
655 return EnableCuMode;
656 }
657
659
660 bool hasFlatAddressSpace() const {
661 return FlatAddressSpace;
662 }
663
664 bool hasFlatScrRegister() const {
665 return hasFlatAddressSpace();
666 }
667
668 bool hasFlatInstOffsets() const {
669 return FlatInstOffsets;
670 }
671
672 bool hasFlatGlobalInsts() const {
673 return FlatGlobalInsts;
674 }
675
676 bool hasFlatScratchInsts() const {
677 return FlatScratchInsts;
678 }
679
680 // Check if target supports ST addressing mode with FLAT scratch instructions.
681 // The ST addressing mode means no registers are used, either VGPR or SGPR,
682 // but only immediate offset is swizzled and added to the FLAT scratch base.
683 bool hasFlatScratchSTMode() const {
685 }
686
687 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
688
691 }
692
693 bool enableFlatScratch() const {
694 return flatScratchIsArchitected() ||
696 }
697
698 bool hasGlobalAddTidInsts() const {
699 return GFX10_BEncoding;
700 }
701
702 bool hasAtomicCSub() const {
703 return GFX10_BEncoding;
704 }
705
706 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
707
708 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
709
710 bool hasExportInsts() const {
711 return !hasGFX940Insts() && !hasGFX1250Insts();
712 }
713
714 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
715
716 // DS_ADD_F64/DS_ADD_RTN_F64
717 bool hasLdsAtomicAddF64() const {
718 return hasGFX90AInsts() || hasGFX1250Insts();
719 }
720
722 return getGeneration() >= GFX9;
723 }
724
727 }
728
730 return getGeneration() > GFX9;
731 }
732
733 bool hasD16LoadStore() const {
734 return getGeneration() >= GFX9;
735 }
736
738 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
739 }
740
741 bool hasD16Images() const {
743 }
744
745 /// Return if most LDS instructions have an m0 use that require m0 to be
746 /// initialized.
747 bool ldsRequiresM0Init() const {
748 return getGeneration() < GFX9;
749 }
750
751 // True if the hardware rewinds and replays GWS operations if a wave is
752 // preempted.
753 //
754 // If this is false, a GWS operation requires testing if a nack set the
755 // MEM_VIOL bit, and repeating if so.
756 bool hasGWSAutoReplay() const {
757 return getGeneration() >= GFX9;
758 }
759
760 /// \returns if target has ds_gws_sema_release_all instruction.
761 bool hasGWSSemaReleaseAll() const {
762 return CIInsts;
763 }
764
765 /// \returns true if the target has integer add/sub instructions that do not
766 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
767 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
768 /// for saturation.
769 bool hasAddNoCarry() const {
770 return AddNoCarryInsts;
771 }
772
773 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
774
775 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
776
777 bool hasUnpackedD16VMem() const {
778 return HasUnpackedD16VMem;
779 }
780
781 // Covers VS/PS/CS graphics shaders
782 bool isMesaGfxShader(const Function &F) const {
783 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
784 }
785
786 bool hasMad64_32() const {
787 return getGeneration() >= SEA_ISLANDS;
788 }
789
790 bool hasSDWAOmod() const {
791 return HasSDWAOmod;
792 }
793
794 bool hasSDWAScalar() const {
795 return HasSDWAScalar;
796 }
797
798 bool hasSDWASdst() const {
799 return HasSDWASdst;
800 }
801
802 bool hasSDWAMac() const {
803 return HasSDWAMac;
804 }
805
806 bool hasSDWAOutModsVOPC() const {
807 return HasSDWAOutModsVOPC;
808 }
809
810 bool hasDLInsts() const {
811 return HasDLInsts;
812 }
813
814 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
815
816 bool hasDot1Insts() const {
817 return HasDot1Insts;
818 }
819
820 bool hasDot2Insts() const {
821 return HasDot2Insts;
822 }
823
824 bool hasDot3Insts() const {
825 return HasDot3Insts;
826 }
827
828 bool hasDot4Insts() const {
829 return HasDot4Insts;
830 }
831
832 bool hasDot5Insts() const {
833 return HasDot5Insts;
834 }
835
836 bool hasDot6Insts() const {
837 return HasDot6Insts;
838 }
839
840 bool hasDot7Insts() const {
841 return HasDot7Insts;
842 }
843
844 bool hasDot8Insts() const {
845 return HasDot8Insts;
846 }
847
848 bool hasDot9Insts() const {
849 return HasDot9Insts;
850 }
851
852 bool hasDot10Insts() const {
853 return HasDot10Insts;
854 }
855
856 bool hasDot11Insts() const {
857 return HasDot11Insts;
858 }
859
860 bool hasDot12Insts() const {
861 return HasDot12Insts;
862 }
863
864 bool hasDot13Insts() const {
865 return HasDot13Insts;
866 }
867
868 bool hasMAIInsts() const {
869 return HasMAIInsts;
870 }
871
872 bool hasFP8Insts() const {
873 return HasFP8Insts;
874 }
875
877
878 bool hasMcastLoadInsts() const { return HasMcastLoadInsts; }
879
880 bool hasCubeInsts() const { return HasCubeInsts; }
881
882 bool hasLerpInst() const { return HasLerpInst; }
883
884 bool hasSadInsts() const { return HasSadInsts; }
885
886 bool hasQsadInsts() const { return HasQsadInsts; }
887
888 bool hasCvtNormInsts() const { return HasCvtNormInsts; }
889
891
893
894 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
895
896 bool hasPkFmacF16Inst() const {
897 return HasPkFmacF16Inst;
898 }
899
903
907
911
915
917
919
923
925
927
931
935
939
943
945
946 /// \return true if the target has flat, global, and buffer atomic fadd for
947 /// double.
951
952 /// \return true if the target's flat, global, and buffer atomic fadd for
953 /// float supports denormal handling.
957
958 /// \return true if atomic operations targeting fine-grained memory work
959 /// correctly at device scope, in allocations in host or peer PCIe device
960 /// memory.
964
965 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
966 /// via CAS loop.
970
972
976
977 bool hasNoSdstCMPX() const {
978 return HasNoSdstCMPX;
979 }
980
981 bool hasVscnt() const {
982 return HasVscnt;
983 }
984
985 bool hasGetWaveIdInst() const {
986 return HasGetWaveIdInst;
987 }
988
989 bool hasSMemTimeInst() const {
990 return HasSMemTimeInst;
991 }
992
995 }
996
1000
1001 bool hasVOP3Literal() const {
1002 return HasVOP3Literal;
1003 }
1004
1005 bool hasNoDataDepHazard() const {
1006 return HasNoDataDepHazard;
1007 }
1008
1010 return getGeneration() < SEA_ISLANDS;
1011 }
1012
1013 bool hasInstPrefetch() const {
1014 return getGeneration() == GFX10 || getGeneration() == GFX11;
1015 }
1016
1017 bool hasPrefetch() const { return GFX12Insts; }
1018
1019 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1020
1022
1023 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1024
1025 // Has s_cmpk_* instructions.
1026 bool hasSCmpK() const { return getGeneration() < GFX12; }
1027
1028 // Scratch is allocated in 256 dword per wave blocks for the entire
1029 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1030 // is 4-byte aligned.
1031 //
1032 // Only 4-byte alignment is really needed to access anything. Transformations
1033 // on the pointer value itself may rely on the alignment / known low bits of
1034 // the pointer. Set this to something above the minimum to avoid needing
1035 // dynamic realignment in common cases.
1036 Align getStackAlignment() const { return Align(16); }
1037
1038 bool enableMachineScheduler() const override {
1039 return true;
1040 }
1041
1042 bool useAA() const override;
1043
1044 bool enableSubRegLiveness() const override {
1045 return true;
1046 }
1047
1050
1051 // static wrappers
1052 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1053
1054 // XXX - Why is this here if it isn't in the default pass set?
1055 bool enableEarlyIfConversion() const override {
1056 return true;
1057 }
1058
1060 const SchedRegion &Region) const override;
1061
1063 const SchedRegion &Region) const override;
1064
1065 void mirFileLoaded(MachineFunction &MF) const override;
1066
1067 unsigned getMaxNumUserSGPRs() const {
1068 return AMDGPU::getMaxNumUserSGPRs(*this);
1069 }
1070
1071 bool hasSMemRealTime() const {
1072 return HasSMemRealTime;
1073 }
1074
1075 bool hasMovrel() const {
1076 return HasMovrel;
1077 }
1078
1079 bool hasVGPRIndexMode() const {
1080 return HasVGPRIndexMode;
1081 }
1082
1083 bool useVGPRIndexMode() const;
1084
1086 return getGeneration() >= VOLCANIC_ISLANDS;
1087 }
1088
1090
1091 bool hasScalarStores() const {
1092 return HasScalarStores;
1093 }
1094
1095 bool hasScalarAtomics() const {
1096 return HasScalarAtomics;
1097 }
1098
1099 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1101
1102 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1103 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1104
1105 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1106 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1107
1108 bool hasDPP() const {
1109 return HasDPP;
1110 }
1111
1112 bool hasDPPBroadcasts() const {
1113 return HasDPP && getGeneration() < GFX10;
1114 }
1115
1117 return HasDPP && getGeneration() < GFX10;
1118 }
1119
1120 bool hasDPP8() const {
1121 return HasDPP8;
1122 }
1123
1124 bool hasDPALU_DPP() const {
1125 return HasDPALU_DPP;
1126 }
1127
1128 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1129
1130 bool hasPackedFP32Ops() const {
1131 return HasPackedFP32Ops;
1132 }
1133
1134 // Has V_PK_MOV_B32 opcode
1135 bool hasPkMovB32() const {
1136 return GFX90AInsts;
1137 }
1138
1140 return getGeneration() >= GFX10 || hasGFX940Insts();
1141 }
1142
1143 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1144
1145 bool hasImageInsts() const {
1146 return HasImageInsts;
1147 }
1148
1150 return HasExtendedImageInsts;
1151 }
1152
1153 bool hasR128A16() const {
1154 return HasR128A16;
1155 }
1156
1157 bool hasA16() const { return HasA16; }
1158
1159 bool hasG16() const { return HasG16; }
1160
1161 bool hasOffset3fBug() const {
1162 return HasOffset3fBug;
1163 }
1164
1166
1168
1169 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1170
1172
1174
1175 bool hasNSAEncoding() const { return HasNSAEncoding; }
1176
1177 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1178
1180
1181 unsigned getNSAMaxSize(bool HasSampler = false) const {
1182 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1183 }
1184
1185 bool hasGFX10_AEncoding() const {
1186 return GFX10_AEncoding;
1187 }
1188
1189 bool hasGFX10_BEncoding() const {
1190 return GFX10_BEncoding;
1191 }
1192
1193 bool hasGFX10_3Insts() const {
1194 return GFX10_3Insts;
1195 }
1196
1197 bool hasMadF16() const;
1198
1199 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1200
1201 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1202
1203 // Scalar and global loads support scale_offset bit.
1204 bool hasScaleOffset() const { return GFX1250Insts; }
1205
1206 bool hasFlatGVSMode() const { return FlatGVSMode; }
1207
1208 // FLAT GLOBAL VOffset is signed
1209 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1210
1211 bool enableSIScheduler() const {
1212 return EnableSIScheduler;
1213 }
1214
1215 bool loadStoreOptEnabled() const {
1216 return EnableLoadStoreOpt;
1217 }
1218
1219 bool hasSGPRInitBug() const {
1220 return SGPRInitBug;
1221 }
1222
1224 return UserSGPRInit16Bug && isWave32();
1225 }
1226
1228
1232
1235 }
1236
1240
1241 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1243 return CIInsts;
1244 }
1245
1248 }
1249
1254
1257 }
1258
1261 }
1262
1265 }
1266
1269 }
1270
1273 }
1274
1275 bool hasLDSMisalignedBug() const {
1276 return LDSMisalignedBug && !EnableCuMode;
1277 }
1278
1280 return HasInstFwdPrefetchBug;
1281 }
1282
1284 return HasVcmpxExecWARHazard;
1285 }
1286
1289 }
1290
1291 // Shift amount of a 64 bit shift cannot be a highest allocated register
1292 // if also at the end of the allocation block.
1294 return GFX90AInsts && !GFX940Insts;
1295 }
1296
1297 // Has one cycle hazard on transcendental instruction feeding a
1298 // non transcendental VALU.
1299 bool hasTransForwardingHazard() const { return GFX940Insts; }
1300
1301 // Has one cycle hazard on a VALU instruction partially writing dst with
1302 // a shift of result bits feeding another VALU instruction.
1304
1305 // Cannot use op_sel with v_dot instructions.
1306 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1307
1308 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1309 bool hasVDecCoExecHazard() const {
1310 return GFX940Insts;
1311 }
1312
1313 bool hasNSAtoVMEMBug() const {
1314 return HasNSAtoVMEMBug;
1315 }
1316
1317 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1318
1319 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1320
1321 bool hasGFX90AInsts() const { return GFX90AInsts; }
1322
1324 return getGeneration() == GFX10;
1325 }
1326
1327 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1328
1329 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1330
1331 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1332
1334 return getGeneration() == GFX11;
1335 }
1336
1338
1340
1341 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1342
1344
1348
1349 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1350
1351 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1352
1354 return GFX1250Insts && getGeneration() == GFX12;
1355 }
1356
1357 /// Return if operations acting on VGPR tuples require even alignment.
1358 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1359
1360 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1361 bool hasSPackHL() const { return GFX11Insts; }
1362
1363 /// Return true if the target's EXP instruction has the COMPR flag, which
1364 /// affects the meaning of the EN (enable) bits.
1365 bool hasCompressedExport() const { return !GFX11Insts; }
1366
1367 /// Return true if the target's EXP instruction supports the NULL export
1368 /// target.
1369 bool hasNullExportTarget() const { return !GFX11Insts; }
1370
1371 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1372
1373 bool hasVOPDInsts() const { return HasVOPDInsts; }
1374
1376
1377 /// Return true if the target has the S_DELAY_ALU instruction.
1378 bool hasDelayAlu() const { return GFX11Insts; }
1379
1380 bool hasPackedTID() const { return HasPackedTID; }
1381
1382 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1383 // hasGFX90AInsts is also true.
1384 bool hasGFX940Insts() const { return GFX940Insts; }
1385
1386 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1387 // hasGFX940Insts and hasGFX90AInsts are also true.
1388 bool hasGFX950Insts() const { return GFX950Insts; }
1389
1390 /// Returns true if the target supports
1391 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1392 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1393 bool hasLDSLoadB96_B128() const {
1394 return hasGFX950Insts();
1395 }
1396
1397 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1398
1399 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1400
1402
1404
1406
1408
1409 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1410 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1411 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1412
1413 /// \returns true if inline constants are not supported for F16 pseudo
1414 /// scalar transcendentals.
1416 return getGeneration() == GFX12;
1417 }
1418
1419 /// \returns true if the target has instructions with xf32 format support.
1420 bool hasXF32Insts() const { return HasXF32Insts; }
1421
1422 /// \returns true if the target has packed f32 instructions that only read 32
1423 /// bits from a scalar operand (SGPR or literal) and replicates the bits to
1424 /// both channels.
1428
1429 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1430
1431 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1432 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1433 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1434
1437 }
1438
1441 }
1442
1443 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1444
1445 bool hasTanhInsts() const { return HasTanhInsts; }
1446
1448
1449 bool hasAddPC64Inst() const { return GFX1250Insts; }
1450
1451 bool useAddPC64Inst() const { return UseAddPC64Inst; }
1452
1454
1456
1459 }
1460
1462
1463 /// \returns true if the target supports expert scheduling mode 2 which relies
1464 /// on the compiler to insert waits to avoid hazards between VMEM and VALU
1465 /// instructions in some instances.
1466 bool hasExpertSchedulingMode() const { return getGeneration() >= GFX12; }
1467
1468 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1469 /// GFX1250.
1470 bool hasWaitXCnt() const { return HasWaitXcnt; }
1471
1472 // A single DWORD instructions can use a 64-bit literal.
1473 bool has64BitLiterals() const { return Has64BitLiterals; }
1474
1476
1478
1479 /// \returns The maximum number of instructions that can be enclosed in an
1480 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1481 /// instruction.
1482 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1483
1484 bool hasPrngInst() const { return HasPrngInst; }
1485
1487
1488 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1489 /// SGPRs
1490 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1491
1492 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1493 /// VGPRs
1494 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1495 unsigned DynamicVGPRBlockSize) const;
1496
1497 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1498 /// be achieved when the only function running on a CU is \p F, each workgroup
1499 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1500 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1501 /// range, so this returns a range as well.
1502 ///
1503 /// Note that occupancy can be affected by the scratch allocation as well, but
1504 /// we do not have enough information to compute it.
1505 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1506 unsigned LDSSize = 0,
1507 unsigned NumSGPRs = 0,
1508 unsigned NumVGPRs = 0) const;
1509
1510 /// \returns true if the flat_scratch register should be initialized with the
1511 /// pointer to the wave's scratch memory rather than a size and offset.
1514 }
1515
1516 /// \returns true if the flat_scratch register is initialized by the HW.
1517 /// In this case it is readonly.
1519
1520 /// \returns true if the architected SGPRs are enabled.
1522
1523 /// \returns true if Global Data Share is supported.
1524 bool hasGDS() const { return HasGDS; }
1525
1526 /// \returns true if Global Wave Sync is supported.
1527 bool hasGWS() const { return HasGWS; }
1528
1529 /// \returns true if the machine has merged shaders in which s0-s7 are
1530 /// reserved by the hardware and user SGPRs start at s8
1531 bool hasMergedShaders() const {
1532 return getGeneration() >= GFX9;
1533 }
1534
1535 // \returns true if the target supports the pre-NGG legacy geometry path.
1536 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1537
1538 // \returns true if preloading kernel arguments is supported.
1539 bool hasKernargPreload() const { return KernargPreload; }
1540
1541 // \returns true if the target has split barriers feature
1542 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1543
1544 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1545 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1546
1547 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1548 // no-return form.
1550
1551 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1552 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1553
1554 // \returns true if the target has IEEE kernel descriptor mode bit
1555 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1556
1557 // \returns true if the target has IEEE fminimum/fmaximum instructions
1559
1560 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1561 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1562
1563 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1564 /// values.
1565 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1566
1567 bool hasGFX1250Insts() const { return GFX1250Insts; }
1568
1570
1571 bool hasVOPD3() const { return GFX1250Insts; }
1572
1573 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1574 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1575
1576 // \returns true if the target has V_MAD_U32 instruction.
1577 bool hasMadU32Inst() const { return HasMadU32Inst; }
1578
1579 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1580 bool hasVectorMulU64() const { return GFX1250Insts; }
1581
1582 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1583 // instructions.
1584 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1585
1586 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1587 bool hasIntMinMax64() const { return GFX1250Insts; }
1588
1589 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1590 bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
1591
1592 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1594
1595 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1596 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1597
1598 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1599 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1600
1601 // \returns true if target has S_SETPRIO_INC_WG instruction.
1603
1604 // \returns true if target has S_WAKEUP_BARRIER instruction.
1605 bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }
1606
1607 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1608 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1609 // extended VA to 57 bits.
1610 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1611
1612 // \returns true if the target needs to create a prolog for backward
1613 // compatibility when preloading kernel arguments.
1615 return hasKernargPreload() && !GFX1250Insts;
1616 }
1617
1618 bool hasCondSubInsts() const { return GFX12Insts; }
1619
1620 bool hasSubClampInsts() const { return hasGFX10_3Insts(); }
1621
1622 /// \returns SGPR allocation granularity supported by the subtarget.
1623 unsigned getSGPRAllocGranule() const {
1625 }
1626
1627 /// \returns SGPR encoding granularity supported by the subtarget.
1628 unsigned getSGPREncodingGranule() const {
1630 }
1631
1632 /// \returns Total number of SGPRs supported by the subtarget.
1633 unsigned getTotalNumSGPRs() const {
1635 }
1636
1637 /// \returns Addressable number of SGPRs supported by the subtarget.
1638 unsigned getAddressableNumSGPRs() const {
1640 }
1641
1642 /// \returns Minimum number of SGPRs that meets the given number of waves per
1643 /// execution unit requirement supported by the subtarget.
1644 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1645 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1646 }
1647
1648 /// \returns Maximum number of SGPRs that meets the given number of waves per
1649 /// execution unit requirement supported by the subtarget.
1650 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1651 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1652 }
1653
1654 /// \returns Reserved number of SGPRs. This is common
1655 /// utility function called by MachineFunction and
1656 /// Function variants of getReservedNumSGPRs.
1657 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1658 /// \returns Reserved number of SGPRs for given machine function \p MF.
1659 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1660
1661 /// \returns Reserved number of SGPRs for given function \p F.
1662 unsigned getReservedNumSGPRs(const Function &F) const;
1663
1664 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1665 unsigned getMaxNumPreloadedSGPRs() const;
1666
1667 /// \returns max num SGPRs. This is the common utility
1668 /// function called by MachineFunction and Function
1669 /// variants of getMaxNumSGPRs.
1670 unsigned getBaseMaxNumSGPRs(const Function &F,
1671 std::pair<unsigned, unsigned> WavesPerEU,
1672 unsigned PreloadedSGPRs,
1673 unsigned ReservedNumSGPRs) const;
1674
1675 /// \returns Maximum number of SGPRs that meets number of waves per execution
1676 /// unit requirement for function \p MF, or number of SGPRs explicitly
1677 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1678 ///
1679 /// \returns Value that meets number of waves per execution unit requirement
1680 /// if explicitly requested value cannot be converted to integer, violates
1681 /// subtarget's specifications, or does not meet number of waves per execution
1682 /// unit requirement.
1683 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1684
1685 /// \returns Maximum number of SGPRs that meets number of waves per execution
1686 /// unit requirement for function \p F, or number of SGPRs explicitly
1687 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1688 ///
1689 /// \returns Value that meets number of waves per execution unit requirement
1690 /// if explicitly requested value cannot be converted to integer, violates
1691 /// subtarget's specifications, or does not meet number of waves per execution
1692 /// unit requirement.
1693 unsigned getMaxNumSGPRs(const Function &F) const;
1694
1695 /// \returns VGPR allocation granularity supported by the subtarget.
1696 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1697 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1698 }
1699
1700 /// \returns VGPR encoding granularity supported by the subtarget.
1701 unsigned getVGPREncodingGranule() const {
1703 }
1704
1705 /// \returns Total number of VGPRs supported by the subtarget.
1706 unsigned getTotalNumVGPRs() const {
1708 }
1709
1710 /// \returns Addressable number of architectural VGPRs supported by the
1711 /// subtarget.
1715
1716 /// \returns Addressable number of VGPRs supported by the subtarget.
1717 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1718 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1719 }
1720
1721 /// \returns the minimum number of VGPRs that will prevent achieving more than
1722 /// the specified number of waves \p WavesPerEU.
1723 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1724 unsigned DynamicVGPRBlockSize) const {
1725 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1726 DynamicVGPRBlockSize);
1727 }
1728
1729 /// \returns the maximum number of VGPRs that can be used and still achieved
1730 /// at least the specified number of waves \p WavesPerEU.
1731 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1732 unsigned DynamicVGPRBlockSize) const {
1733 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1734 DynamicVGPRBlockSize);
1735 }
1736
1737 /// \returns max num VGPRs. This is the common utility function
1738 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1739 unsigned
1741 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1742
1743 /// \returns Maximum number of VGPRs that meets number of waves per execution
1744 /// unit requirement for function \p F, or number of VGPRs explicitly
1745 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1746 ///
1747 /// \returns Value that meets number of waves per execution unit requirement
1748 /// if explicitly requested value cannot be converted to integer, violates
1749 /// subtarget's specifications, or does not meet number of waves per execution
1750 /// unit requirement.
1751 unsigned getMaxNumVGPRs(const Function &F) const;
1752
1753 unsigned getMaxNumAGPRs(const Function &F) const {
1754 return getMaxNumVGPRs(F);
1755 }
1756
1757 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1758 /// of waves per execution unit required for the function \p MF.
1759 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1760
1761 /// \returns Maximum number of VGPRs that meets number of waves per execution
1762 /// unit requirement for function \p MF, or number of VGPRs explicitly
1763 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1764 ///
1765 /// \returns Value that meets number of waves per execution unit requirement
1766 /// if explicitly requested value cannot be converted to integer, violates
1767 /// subtarget's specifications, or does not meet number of waves per execution
1768 /// unit requirement.
1769 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1770
1771 bool supportsWave32() const { return getGeneration() >= GFX10; }
1772
1773 bool supportsWave64() const { return !hasGFX1250Insts(); }
1774
1775 bool isWave32() const {
1776 return getWavefrontSize() == 32;
1777 }
1778
1779 bool isWave64() const {
1780 return getWavefrontSize() == 64;
1781 }
1782
1783 /// Returns if the wavesize of this subtarget is known reliable. This is false
1784 /// only for the a default target-cpu that does not have an explicit
1785 /// +wavefrontsize target feature.
1786 bool isWaveSizeKnown() const {
1787 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1788 hasFeature(AMDGPU::FeatureWavefrontSize64);
1789 }
1790
1792 return getRegisterInfo()->getBoolRC();
1793 }
1794
1795 /// \returns Maximum number of work groups per compute unit supported by the
1796 /// subtarget and limited by given \p FlatWorkGroupSize.
1797 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1798 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1799 }
1800
1801 /// \returns Minimum flat work group size supported by the subtarget.
1802 unsigned getMinFlatWorkGroupSize() const override {
1804 }
1805
1806 /// \returns Maximum flat work group size supported by the subtarget.
1807 unsigned getMaxFlatWorkGroupSize() const override {
1809 }
1810
1811 /// \returns Number of waves per execution unit required to support the given
1812 /// \p FlatWorkGroupSize.
1813 unsigned
1814 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1815 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1816 }
1817
1818 /// \returns Minimum number of waves per execution unit supported by the
1819 /// subtarget.
1820 unsigned getMinWavesPerEU() const override {
1822 }
1823
1824 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1825 SDep &Dep,
1826 const TargetSchedModel *SchedModel) const override;
1827
1828 // \returns true if it's beneficial on this subtarget for the scheduler to
1829 // cluster stores as well as loads.
1830 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1831
1832 // \returns the number of address arguments from which to enable MIMG NSA
1833 // on supported architectures.
1834 unsigned getNSAThreshold(const MachineFunction &MF) const;
1835
1836 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1837 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1839
1840 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1841 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1843
1844 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1845 unsigned getDynamicVGPRBlockSize() const {
1846 return DynamicVGPRBlockSize32 ? 32 : 16;
1847 }
1848
1850 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1851 // to the same register.
1852 return false;
1853 }
1854
1855 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1856 // and surronded by S_WAIT_ALU(0xFFE3).
1858 return getGeneration() == GFX12;
1859 }
1860
1861 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1862 // read.
1864 return GFX1250Insts && getGeneration() == GFX12;
1865 }
1866
1867 // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit
1868 // result.
1870 return GFX1250Insts && getGeneration() == GFX12;
1871 }
1872
1873 /// \returns true if the subtarget supports clusters of workgroups.
1874 bool hasClusters() const { return HasClusters; }
1875
1876 /// \returns true if the subtarget requires a wait for xcnt before VMEM
1877 /// accesses that must never be repeated in the event of a page fault/re-try.
1878 /// Atomic stores/rmw and all volatile accesses fall under this criteria.
1882
1883 /// \returns the number of significant bits in the immediate field of the
1884 /// S_NOP instruction.
1885 unsigned getSNopBits() const {
1887 return 7;
1889 return 4;
1890 return 3;
1891 }
1892
1893 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1894 /// num_records.
1898
1902
1906
1908 return (getGeneration() <= AMDGPUSubtarget::GFX9 ||
1910 isWave32();
1911 }
1912};
1913
1915public:
1916 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1917
1918 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1919
1920 bool hasDispatchPtr() const { return DispatchPtr; }
1921
1922 bool hasQueuePtr() const { return QueuePtr; }
1923
1924 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1925
1926 bool hasDispatchID() const { return DispatchID; }
1927
1928 bool hasFlatScratchInit() const { return FlatScratchInit; }
1929
1930 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1931
1932 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1933
1934 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1935
1936 unsigned getNumFreeUserSGPRs();
1937
1938 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1939
1950
1951 // Returns the size in number of SGPRs for preload user SGPR field.
1953 switch (ID) {
1955 return 2;
1957 return 4;
1958 case DispatchPtrID:
1959 return 2;
1960 case QueuePtrID:
1961 return 2;
1963 return 2;
1964 case DispatchIdID:
1965 return 2;
1966 case FlatScratchInitID:
1967 return 2;
1969 return 1;
1970 }
1971 llvm_unreachable("Unknown UserSGPRID.");
1972 }
1973
1974 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1975
1976private:
1977 const GCNSubtarget &ST;
1978
1979 // Private memory buffer
1980 // Compute directly in sgpr[0:1]
1981 // Other shaders indirect 64-bits at sgpr[0:1]
1982 bool ImplicitBufferPtr = false;
1983
1984 bool PrivateSegmentBuffer = false;
1985
1986 bool DispatchPtr = false;
1987
1988 bool QueuePtr = false;
1989
1990 bool KernargSegmentPtr = false;
1991
1992 bool DispatchID = false;
1993
1994 bool FlatScratchInit = false;
1995
1996 bool PrivateSegmentSize = false;
1997
1998 unsigned NumKernargPreloadSGPRs = 0;
1999
2000 unsigned NumUsedUserSGPRs = 0;
2001};
2002
2003} // end namespace llvm
2004
2005#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
bool hasExpertSchedulingMode() const
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
bool useAddPC64Inst() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasFlatScratchHiInB64InstHazard() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
bool hasSWakeupBarrier() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
bool hasCondSubInsts() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasCvtPkNormVOP3Insts() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
bool hasSetregVGPRMSBFixup() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool RequiresWaitsBeforeSystemScopeStores
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
bool hasMcastLoadInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool hasCvtNormInsts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasLerpInst() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool supportsWaveWideBPermute() const
bool hasCubeInsts() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool supportsBPermute() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasSubClampInsts() const
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool requiresWaitXCntForSingleAccessInstructions() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasSadInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasCvtPkNormVOP2Insts() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasINVWBL2WaitCntRequirement() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasQsadInsts() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool requiresWaitsBeforeSystemScopeStores() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.