LLVM 19.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
26
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
29
30namespace llvm {
31
32class GCNTargetMachine;
33
35 public AMDGPUSubtarget {
36public:
38
39 // Following 2 enums are documented at:
40 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
41 enum class TrapHandlerAbi {
42 NONE = 0x00,
43 AMDHSA = 0x01,
44 };
45
46 enum class TrapID {
47 LLVMAMDHSATrap = 0x02,
49 };
50
51private:
52 /// GlobalISel related APIs.
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
56 std::unique_ptr<LegalizerInfo> Legalizer;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
58
59protected:
60 // Basic subtarget description.
63 unsigned Gen = INVALID;
65 int LDSBankCount = 0;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81 bool KernargPreload = false;
82
83 // This should not be used directly. 'TargetID' tracks the dynamic settings
84 // for XNACK.
85 bool EnableXNACK = false;
86
87 bool EnableTgSplit = false;
88 bool EnableCuMode = false;
89 bool TrapHandler = false;
90 bool EnablePreciseMemory = false;
91
92 // Used as options.
93 bool EnableLoadStoreOpt = false;
95 bool EnableSIScheduler = false;
96 bool EnableDS128 = false;
97 bool EnablePRTStrictNull = false;
98 bool DumpCode = false;
99
100 // Subtarget statically properties set by tablegen
101 bool FP64 = false;
102 bool FMA = false;
103 bool MIMG_R128 = false;
104 bool CIInsts = false;
105 bool GFX8Insts = false;
106 bool GFX9Insts = false;
107 bool GFX90AInsts = false;
108 bool GFX940Insts = false;
109 bool GFX10Insts = false;
110 bool GFX11Insts = false;
111 bool GFX12Insts = false;
112 bool GFX10_3Insts = false;
113 bool GFX7GFX8GFX9Insts = false;
114 bool SGPRInitBug = false;
115 bool UserSGPRInit16Bug = false;
118 bool HasSMemRealTime = false;
119 bool HasIntClamp = false;
120 bool HasFmaMixInsts = false;
121 bool HasMovrel = false;
122 bool HasVGPRIndexMode = false;
124 bool HasScalarStores = false;
125 bool HasScalarAtomics = false;
126 bool HasSDWAOmod = false;
127 bool HasSDWAScalar = false;
128 bool HasSDWASdst = false;
129 bool HasSDWAMac = false;
130 bool HasSDWAOutModsVOPC = false;
131 bool HasDPP = false;
132 bool HasDPP8 = false;
133 bool HasDPALU_DPP = false;
134 bool HasDPPSrc1SGPR = false;
135 bool HasPackedFP32Ops = false;
136 bool HasImageInsts = false;
138 bool HasR128A16 = false;
139 bool HasA16 = false;
140 bool HasG16 = false;
141 bool HasNSAEncoding = false;
143 bool GFX10_AEncoding = false;
144 bool GFX10_BEncoding = false;
145 bool HasDLInsts = false;
146 bool HasFmacF64Inst = false;
147 bool HasDot1Insts = false;
148 bool HasDot2Insts = false;
149 bool HasDot3Insts = false;
150 bool HasDot4Insts = false;
151 bool HasDot5Insts = false;
152 bool HasDot6Insts = false;
153 bool HasDot7Insts = false;
154 bool HasDot8Insts = false;
155 bool HasDot9Insts = false;
156 bool HasDot10Insts = false;
157 bool HasDot11Insts = false;
158 bool HasMAIInsts = false;
159 bool HasFP8Insts = false;
161 bool HasPkFmacF16Inst = false;
173 /// The maximum number of instructions that may be placed within an S_CLAUSE,
174 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
175 /// indicates a lack of S_CLAUSE support.
177 bool SupportsSRAMECC = false;
178
179 // This should not be used directly. 'TargetID' tracks the dynamic settings
180 // for SRAMECC.
181 bool EnableSRAMECC = false;
182
183 bool HasNoSdstCMPX = false;
184 bool HasVscnt = false;
185 bool HasGetWaveIdInst = false;
186 bool HasSMemTimeInst = false;
189 bool HasVOP3Literal = false;
190 bool HasNoDataDepHazard = false;
191 bool FlatAddressSpace = false;
192 bool FlatInstOffsets = false;
193 bool FlatGlobalInsts = false;
194 bool FlatScratchInsts = false;
197 bool EnableFlatScratch = false;
199 bool HasGDS = false;
200 bool HasGWS = false;
201 bool AddNoCarryInsts = false;
202 bool HasUnpackedD16VMem = false;
203 bool LDSMisalignedBug = false;
206 bool UnalignedDSAccess = false;
207 bool HasPackedTID = false;
208 bool ScalarizeGlobal = false;
209 bool HasSALUFloatInsts = false;
213
220 bool HasNSAtoVMEMBug = false;
221 bool HasNSAClauseBug = false;
222 bool HasOffset3fBug = false;
228 bool Has1_5xVGPRs = false;
229 bool HasMADIntraFwdBug = false;
230 bool HasVOPDInsts = false;
233
234 bool RequiresCOV6 = false;
235
236 // Dummy feature to use for assembler in tablegen.
237 bool FeatureDisable = false;
238
240private:
241 SIInstrInfo InstrInfo;
242 SITargetLowering TLInfo;
243 SIFrameLowering FrameLowering;
244
245public:
246 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
247 const GCNTargetMachine &TM);
248 ~GCNSubtarget() override;
249
251 StringRef GPU, StringRef FS);
252
253 const SIInstrInfo *getInstrInfo() const override {
254 return &InstrInfo;
255 }
256
257 const SIFrameLowering *getFrameLowering() const override {
258 return &FrameLowering;
259 }
260
261 const SITargetLowering *getTargetLowering() const override {
262 return &TLInfo;
263 }
264
265 const SIRegisterInfo *getRegisterInfo() const override {
266 return &InstrInfo.getRegisterInfo();
267 }
268
269 const CallLowering *getCallLowering() const override {
270 return CallLoweringInfo.get();
271 }
272
273 const InlineAsmLowering *getInlineAsmLowering() const override {
274 return InlineAsmLoweringInfo.get();
275 }
276
278 return InstSelector.get();
279 }
280
281 const LegalizerInfo *getLegalizerInfo() const override {
282 return Legalizer.get();
283 }
284
285 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
286 return RegBankInfo.get();
287 }
288
290 return TargetID;
291 }
292
293 // Nothing implemented, just prevent crashes on use.
295 return &TSInfo;
296 }
297
299 return &InstrItins;
300 }
301
303
305 return (Generation)Gen;
306 }
307
308 unsigned getMaxWaveScratchSize() const {
309 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
310 if (getGeneration() >= GFX12) {
311 // 18-bit field in units of 64-dword.
312 return (64 * 4) * ((1 << 18) - 1);
313 }
314 if (getGeneration() == GFX11) {
315 // 15-bit field in units of 64-dword.
316 return (64 * 4) * ((1 << 15) - 1);
317 }
318 // 13-bit field in units of 256-dword.
319 return (256 * 4) * ((1 << 13) - 1);
320 }
321
322 /// Return the number of high bits known to be zero for a frame index.
325 }
326
327 int getLDSBankCount() const {
328 return LDSBankCount;
329 }
330
331 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
332 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
333 }
334
335 unsigned getConstantBusLimit(unsigned Opcode) const;
336
337 /// Returns if the result of this instruction with a 16-bit result returned in
338 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
339 /// the original value.
340 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
341
342 bool supportsWGP() const { return getGeneration() >= GFX10; }
343
344 bool hasIntClamp() const {
345 return HasIntClamp;
346 }
347
348 bool hasFP64() const {
349 return FP64;
350 }
351
352 bool hasMIMG_R128() const {
353 return MIMG_R128;
354 }
355
356 bool hasHWFP64() const {
357 return FP64;
358 }
359
360 bool hasHalfRate64Ops() const {
361 return HalfRate64Ops;
362 }
363
364 bool hasFullRate64Ops() const {
365 return FullRate64Ops;
366 }
367
368 bool hasAddr64() const {
370 }
371
372 bool hasFlat() const {
374 }
375
376 // Return true if the target only has the reverse operand versions of VALU
377 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
378 bool hasOnlyRevVALUShifts() const {
380 }
381
382 bool hasFractBug() const {
384 }
385
386 bool hasBFE() const {
387 return true;
388 }
389
390 bool hasBFI() const {
391 return true;
392 }
393
394 bool hasBFM() const {
395 return hasBFE();
396 }
397
398 bool hasBCNT(unsigned Size) const {
399 return true;
400 }
401
402 bool hasFFBL() const {
403 return true;
404 }
405
406 bool hasFFBH() const {
407 return true;
408 }
409
410 bool hasMed3_16() const {
412 }
413
414 bool hasMin3Max3_16() const {
416 }
417
418 bool hasFmaMixInsts() const {
419 return HasFmaMixInsts;
420 }
421
422 bool hasCARRY() const {
423 return true;
424 }
425
426 bool hasFMA() const {
427 return FMA;
428 }
429
430 bool hasSwap() const {
431 return GFX9Insts;
432 }
433
434 bool hasScalarPackInsts() const {
435 return GFX9Insts;
436 }
437
438 bool hasScalarMulHiInsts() const {
439 return GFX9Insts;
440 }
441
442 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
443
446 }
447
449 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
450 return getGeneration() >= GFX9;
451 }
452
453 /// True if the offset field of DS instructions works as expected. On SI, the
454 /// offset uses a 16-bit adder and does not always wrap properly.
455 bool hasUsableDSOffset() const {
456 return getGeneration() >= SEA_ISLANDS;
457 }
458
461 }
462
463 /// Condition output from div_scale is usable.
466 }
467
468 /// Extra wait hazard is needed in some cases before
469 /// s_cbranch_vccnz/s_cbranch_vccz.
470 bool hasReadVCCZBug() const {
471 return getGeneration() <= SEA_ISLANDS;
472 }
473
474 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
476 return getGeneration() >= GFX10;
477 }
478
479 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
480 /// was written by a VALU instruction.
483 }
484
485 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
486 /// SGPR was written by a VALU Instruction.
489 }
490
491 bool hasRFEHazards() const {
493 }
494
495 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
496 unsigned getSetRegWaitStates() const {
497 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
498 }
499
500 bool dumpCode() const {
501 return DumpCode;
502 }
503
504 /// Return the amount of LDS that can be used that will not restrict the
505 /// occupancy lower than WaveCount.
506 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
507 const Function &) const;
508
511 }
512
513 /// \returns If target supports S_DENORM_MODE.
514 bool hasDenormModeInst() const {
516 }
517
518 bool useFlatForGlobal() const {
519 return FlatForGlobal;
520 }
521
522 /// \returns If target supports ds_read/write_b128 and user enables generation
523 /// of ds_read/write_b128.
524 bool useDS128() const {
525 return CIInsts && EnableDS128;
526 }
527
528 /// \return If target supports ds_read/write_b96/128.
529 bool hasDS96AndDS128() const {
530 return CIInsts;
531 }
532
533 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
534 bool haveRoundOpsF64() const {
535 return CIInsts;
536 }
537
538 /// \returns If MUBUF instructions always perform range checking, even for
539 /// buffer resources used for private memory access.
542 }
543
544 /// \returns If target requires PRT Struct NULL support (zero result registers
545 /// for sparse texture support).
546 bool usePRTStrictNull() const {
547 return EnablePRTStrictNull;
548 }
549
552 }
553
554 /// \returns true if the target supports backing off of s_barrier instructions
555 /// when an exception is raised.
557 return BackOffBarrier;
558 }
559
562 }
563
566 }
567
568 bool hasUnalignedDSAccess() const {
569 return UnalignedDSAccess;
570 }
571
574 }
575
578 }
579
581 return UnalignedAccessMode;
582 }
583
584 bool hasApertureRegs() const {
585 return HasApertureRegs;
586 }
587
588 bool isTrapHandlerEnabled() const {
589 return TrapHandler;
590 }
591
592 bool isXNACKEnabled() const {
593 return TargetID.isXnackOnOrAny();
594 }
595
596 bool isTgSplitEnabled() const {
597 return EnableTgSplit;
598 }
599
600 bool isCuModeEnabled() const {
601 return EnableCuMode;
602 }
603
605
606 bool hasFlatAddressSpace() const {
607 return FlatAddressSpace;
608 }
609
610 bool hasFlatScrRegister() const {
611 return hasFlatAddressSpace();
612 }
613
614 bool hasFlatInstOffsets() const {
615 return FlatInstOffsets;
616 }
617
618 bool hasFlatGlobalInsts() const {
619 return FlatGlobalInsts;
620 }
621
622 bool hasFlatScratchInsts() const {
623 return FlatScratchInsts;
624 }
625
626 // Check if target supports ST addressing mode with FLAT scratch instructions.
627 // The ST addressing mode means no registers are used, either VGPR or SGPR,
628 // but only immediate offset is swizzled and added to the FLAT scratch base.
629 bool hasFlatScratchSTMode() const {
631 }
632
633 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
634
637 }
638
639 bool enableFlatScratch() const {
640 return flatScratchIsArchitected() ||
642 }
643
644 bool hasGlobalAddTidInsts() const {
645 return GFX10_BEncoding;
646 }
647
648 bool hasAtomicCSub() const {
649 return GFX10_BEncoding;
650 }
651
652 // BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
654
655 bool hasExportInsts() const {
656 return !hasGFX940Insts();
657 }
658
659 bool hasVINTERPEncoding() const {
660 return GFX11Insts;
661 }
662
663 // DS_ADD_F64/DS_ADD_RTN_F64
664 bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
665
667 return getGeneration() >= GFX9;
668 }
669
672 }
673
675 return getGeneration() > GFX9;
676 }
677
678 bool hasD16LoadStore() const {
679 return getGeneration() >= GFX9;
680 }
681
684 }
685
686 bool hasD16Images() const {
688 }
689
690 /// Return if most LDS instructions have an m0 use that require m0 to be
691 /// initialized.
692 bool ldsRequiresM0Init() const {
693 return getGeneration() < GFX9;
694 }
695
696 // True if the hardware rewinds and replays GWS operations if a wave is
697 // preempted.
698 //
699 // If this is false, a GWS operation requires testing if a nack set the
700 // MEM_VIOL bit, and repeating if so.
701 bool hasGWSAutoReplay() const {
702 return getGeneration() >= GFX9;
703 }
704
705 /// \returns if target has ds_gws_sema_release_all instruction.
706 bool hasGWSSemaReleaseAll() const {
707 return CIInsts;
708 }
709
710 /// \returns true if the target has integer add/sub instructions that do not
711 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
712 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
713 /// for saturation.
714 bool hasAddNoCarry() const {
715 return AddNoCarryInsts;
716 }
717
718 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
719
720 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
721
722 bool hasUnpackedD16VMem() const {
723 return HasUnpackedD16VMem;
724 }
725
726 // Covers VS/PS/CS graphics shaders
727 bool isMesaGfxShader(const Function &F) const {
728 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
729 }
730
731 bool hasMad64_32() const {
732 return getGeneration() >= SEA_ISLANDS;
733 }
734
735 bool hasSDWAOmod() const {
736 return HasSDWAOmod;
737 }
738
739 bool hasSDWAScalar() const {
740 return HasSDWAScalar;
741 }
742
743 bool hasSDWASdst() const {
744 return HasSDWASdst;
745 }
746
747 bool hasSDWAMac() const {
748 return HasSDWAMac;
749 }
750
751 bool hasSDWAOutModsVOPC() const {
752 return HasSDWAOutModsVOPC;
753 }
754
755 bool hasDLInsts() const {
756 return HasDLInsts;
757 }
758
759 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
760
761 bool hasDot1Insts() const {
762 return HasDot1Insts;
763 }
764
765 bool hasDot2Insts() const {
766 return HasDot2Insts;
767 }
768
769 bool hasDot3Insts() const {
770 return HasDot3Insts;
771 }
772
773 bool hasDot4Insts() const {
774 return HasDot4Insts;
775 }
776
777 bool hasDot5Insts() const {
778 return HasDot5Insts;
779 }
780
781 bool hasDot6Insts() const {
782 return HasDot6Insts;
783 }
784
785 bool hasDot7Insts() const {
786 return HasDot7Insts;
787 }
788
789 bool hasDot8Insts() const {
790 return HasDot8Insts;
791 }
792
793 bool hasDot9Insts() const {
794 return HasDot9Insts;
795 }
796
797 bool hasDot10Insts() const {
798 return HasDot10Insts;
799 }
800
801 bool hasDot11Insts() const {
802 return HasDot11Insts;
803 }
804
805 bool hasMAIInsts() const {
806 return HasMAIInsts;
807 }
808
809 bool hasFP8Insts() const {
810 return HasFP8Insts;
811 }
812
814
815 bool hasPkFmacF16Inst() const {
816 return HasPkFmacF16Inst;
817 }
818
820
822
823 bool hasAtomicFaddInsts() const {
825 }
826
828
830
833 }
834
837 }
838
841 }
842
844
846
849 }
850
851 bool hasNoSdstCMPX() const {
852 return HasNoSdstCMPX;
853 }
854
855 bool hasVscnt() const {
856 return HasVscnt;
857 }
858
859 bool hasGetWaveIdInst() const {
860 return HasGetWaveIdInst;
861 }
862
863 bool hasSMemTimeInst() const {
864 return HasSMemTimeInst;
865 }
866
869 }
870
873 }
874
875 bool hasVOP3Literal() const {
876 return HasVOP3Literal;
877 }
878
879 bool hasNoDataDepHazard() const {
880 return HasNoDataDepHazard;
881 }
882
884 return getGeneration() < SEA_ISLANDS;
885 }
886
887 bool hasInstPrefetch() const {
888 return getGeneration() == GFX10 || getGeneration() == GFX11;
889 }
890
891 bool hasPrefetch() const { return GFX12Insts; }
892
893 // Has s_cmpk_* instructions.
894 bool hasSCmpK() const { return getGeneration() < GFX12; }
895
896 // Scratch is allocated in 256 dword per wave blocks for the entire
897 // wavefront. When viewed from the perspective of an arbitrary workitem, this
898 // is 4-byte aligned.
899 //
900 // Only 4-byte alignment is really needed to access anything. Transformations
901 // on the pointer value itself may rely on the alignment / known low bits of
902 // the pointer. Set this to something above the minimum to avoid needing
903 // dynamic realignment in common cases.
904 Align getStackAlignment() const { return Align(16); }
905
906 bool enableMachineScheduler() const override {
907 return true;
908 }
909
910 bool useAA() const override;
911
912 bool enableSubRegLiveness() const override {
913 return true;
914 }
915
918
919 // static wrappers
920 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
921
922 // XXX - Why is this here if it isn't in the default pass set?
923 bool enableEarlyIfConversion() const override {
924 return true;
925 }
926
928 unsigned NumRegionInstrs) const override;
929
930 void mirFileLoaded(MachineFunction &MF) const override;
931
932 unsigned getMaxNumUserSGPRs() const {
933 return AMDGPU::getMaxNumUserSGPRs(*this);
934 }
935
936 bool hasSMemRealTime() const {
937 return HasSMemRealTime;
938 }
939
940 bool hasMovrel() const {
941 return HasMovrel;
942 }
943
944 bool hasVGPRIndexMode() const {
945 return HasVGPRIndexMode;
946 }
947
948 bool useVGPRIndexMode() const;
949
950 bool hasScalarCompareEq64() const {
952 }
953
955
956 bool hasScalarStores() const {
957 return HasScalarStores;
958 }
959
960 bool hasScalarAtomics() const {
961 return HasScalarAtomics;
962 }
963
964 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
965 bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
966
967 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
968 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
969
970 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
971 bool hasPermLane64() const { return getGeneration() >= GFX11; }
972
973 bool hasDPP() const {
974 return HasDPP;
975 }
976
977 bool hasDPPBroadcasts() const {
978 return HasDPP && getGeneration() < GFX10;
979 }
980
982 return HasDPP && getGeneration() < GFX10;
983 }
984
985 bool hasDPP8() const {
986 return HasDPP8;
987 }
988
989 bool hasDPALU_DPP() const {
990 return HasDPALU_DPP;
991 }
992
993 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
994
995 bool hasPackedFP32Ops() const {
996 return HasPackedFP32Ops;
997 }
998
999 // Has V_PK_MOV_B32 opcode
1000 bool hasPkMovB32() const {
1001 return GFX90AInsts;
1002 }
1003
1005 return getGeneration() >= GFX10 || hasGFX940Insts();
1006 }
1007
1008 bool hasImageInsts() const {
1009 return HasImageInsts;
1010 }
1011
1013 return HasExtendedImageInsts;
1014 }
1015
1016 bool hasR128A16() const {
1017 return HasR128A16;
1018 }
1019
1020 bool hasA16() const { return HasA16; }
1021
1022 bool hasG16() const { return HasG16; }
1023
1024 bool hasOffset3fBug() const {
1025 return HasOffset3fBug;
1026 }
1027
1029
1031
1032 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1033
1035
1037
1038 bool hasNSAEncoding() const { return HasNSAEncoding; }
1039
1040 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1041
1043
1044 unsigned getNSAMaxSize(bool HasSampler = false) const {
1045 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1046 }
1047
1048 bool hasGFX10_AEncoding() const {
1049 return GFX10_AEncoding;
1050 }
1051
1052 bool hasGFX10_BEncoding() const {
1053 return GFX10_BEncoding;
1054 }
1055
1056 bool hasGFX10_3Insts() const {
1057 return GFX10_3Insts;
1058 }
1059
1060 bool hasMadF16() const;
1061
1062 bool hasMovB64() const { return GFX940Insts; }
1063
1064 bool hasLshlAddB64() const { return GFX940Insts; }
1065
1066 bool enableSIScheduler() const {
1067 return EnableSIScheduler;
1068 }
1069
1070 bool loadStoreOptEnabled() const {
1071 return EnableLoadStoreOpt;
1072 }
1073
1074 bool hasSGPRInitBug() const {
1075 return SGPRInitBug;
1076 }
1077
1079 return UserSGPRInit16Bug && isWave32();
1080 }
1081
1083
1086 }
1087
1090 }
1091
1094 }
1095
1096 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1098 return CIInsts;
1099 }
1100
1103 }
1104
1108 }
1109
1112 }
1113
1116 }
1117
1120 }
1121
1124 }
1125
1128 }
1129
1130 bool hasLDSMisalignedBug() const {
1131 return LDSMisalignedBug && !EnableCuMode;
1132 }
1133
1135 return HasInstFwdPrefetchBug;
1136 }
1137
1139 return HasVcmpxExecWARHazard;
1140 }
1141
1144 }
1145
1146 // Shift amount of a 64 bit shift cannot be a highest allocated register
1147 // if also at the end of the allocation block.
1149 return GFX90AInsts && !GFX940Insts;
1150 }
1151
1152 // Has one cycle hazard on transcendental instruction feeding a
1153 // non transcendental VALU.
1154 bool hasTransForwardingHazard() const { return GFX940Insts; }
1155
1156 // Has one cycle hazard on a VALU instruction partially writing dst with
1157 // a shift of result bits feeding another VALU instruction.
1159
1160 // Cannot use op_sel with v_dot instructions.
1161 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1162
1163 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1164 bool hasVDecCoExecHazard() const {
1165 return GFX940Insts;
1166 }
1167
1168 bool hasNSAtoVMEMBug() const {
1169 return HasNSAtoVMEMBug;
1170 }
1171
1172 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1173
1174 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1175
1176 bool hasGFX90AInsts() const { return GFX90AInsts; }
1177
1179 return getGeneration() == GFX10;
1180 }
1181
1182 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1183
1184 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1185
1186 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1187
1189 return getGeneration() == GFX11;
1190 }
1191
1193
1195
1196 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1197
1198 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1199
1200 /// Return if operations acting on VGPR tuples require even alignment.
1201 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1202
1203 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1204 bool hasSPackHL() const { return GFX11Insts; }
1205
1206 /// Return true if the target's EXP instruction has the COMPR flag, which
1207 /// affects the meaning of the EN (enable) bits.
1208 bool hasCompressedExport() const { return !GFX11Insts; }
1209
1210 /// Return true if the target's EXP instruction supports the NULL export
1211 /// target.
1212 bool hasNullExportTarget() const { return !GFX11Insts; }
1213
1214 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1215
1216 bool hasVOPDInsts() const { return HasVOPDInsts; }
1217
1219
1220 /// Return true if the target has the S_DELAY_ALU instruction.
1221 bool hasDelayAlu() const { return GFX11Insts; }
1222
1223 bool hasPackedTID() const { return HasPackedTID; }
1224
1225 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1226 // hasGFX90AInsts is also true.
1227 bool hasGFX940Insts() const { return GFX940Insts; }
1228
1229 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1230
1232
1234
1236
1237 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1238 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1239 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1240
1241 /// \returns The maximum number of instructions that can be enclosed in an
1242 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1243 /// instruction.
1244 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1245
1246 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1247 /// SGPRs
1248 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1249
1250 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1251 /// VGPRs
1252 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1253
1254 /// Return occupancy for the given function. Used LDS and a number of
1255 /// registers if provided.
1256 /// Note, occupancy can be affected by the scratch allocation as well, but
1257 /// we do not have enough information to compute it.
1258 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1259 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1260
1261 /// \returns true if the flat_scratch register should be initialized with the
1262 /// pointer to the wave's scratch memory rather than a size and offset.
1265 }
1266
1267 /// \returns true if the flat_scratch register is initialized by the HW.
1268 /// In this case it is readonly.
1270
1271 /// \returns true if the architected SGPRs are enabled.
1273
1274 /// \returns true if Global Data Share is supported.
1275 bool hasGDS() const { return HasGDS; }
1276
1277 /// \returns true if Global Wave Sync is supported.
1278 bool hasGWS() const { return HasGWS; }
1279
1280 /// \returns true if the machine has merged shaders in which s0-s7 are
1281 /// reserved by the hardware and user SGPRs start at s8
1282 bool hasMergedShaders() const {
1283 return getGeneration() >= GFX9;
1284 }
1285
1286 // \returns true if the target supports the pre-NGG legacy geometry path.
1287 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1288
1289 // \returns true if preloading kernel arguments is supported.
1290 bool hasKernargPreload() const { return KernargPreload; }
1291
1292 // \returns true if the target has split barriers feature
1293 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1294
1295 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1296 bool hasCvtFP8VOP1Bug() const { return true; }
1297
1298 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1299 // no-return form.
1301
1302 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1303 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1304
1305 // \returns true if the target has IEEE kernel descriptor mode bit
1306 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1307
1308 // \returns true if the target has IEEE fminimum/fmaximum instructions
1309 bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
1310
1311 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1312 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1313
1314 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1315 /// values.
1316 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1317
1318 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1319 // of sign-extending.
1320 bool hasGetPCZeroExtension() const { return GFX12Insts; }
1321
1322 /// \returns SGPR allocation granularity supported by the subtarget.
1323 unsigned getSGPRAllocGranule() const {
1325 }
1326
1327 /// \returns SGPR encoding granularity supported by the subtarget.
1328 unsigned getSGPREncodingGranule() const {
1330 }
1331
1332 /// \returns Total number of SGPRs supported by the subtarget.
1333 unsigned getTotalNumSGPRs() const {
1335 }
1336
1337 /// \returns Addressable number of SGPRs supported by the subtarget.
1338 unsigned getAddressableNumSGPRs() const {
1340 }
1341
1342 /// \returns Minimum number of SGPRs that meets the given number of waves per
1343 /// execution unit requirement supported by the subtarget.
1344 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1345 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1346 }
1347
1348 /// \returns Maximum number of SGPRs that meets the given number of waves per
1349 /// execution unit requirement supported by the subtarget.
1350 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1351 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1352 }
1353
1354 /// \returns Reserved number of SGPRs. This is common
1355 /// utility function called by MachineFunction and
1356 /// Function variants of getReservedNumSGPRs.
1357 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1358 /// \returns Reserved number of SGPRs for given machine function \p MF.
1359 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1360
1361 /// \returns Reserved number of SGPRs for given function \p F.
1362 unsigned getReservedNumSGPRs(const Function &F) const;
1363
1364 /// \returns max num SGPRs. This is the common utility
1365 /// function called by MachineFunction and Function
1366 /// variants of getMaxNumSGPRs.
1367 unsigned getBaseMaxNumSGPRs(const Function &F,
1368 std::pair<unsigned, unsigned> WavesPerEU,
1369 unsigned PreloadedSGPRs,
1370 unsigned ReservedNumSGPRs) const;
1371
1372 /// \returns Maximum number of SGPRs that meets number of waves per execution
1373 /// unit requirement for function \p MF, or number of SGPRs explicitly
1374 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1375 ///
1376 /// \returns Value that meets number of waves per execution unit requirement
1377 /// if explicitly requested value cannot be converted to integer, violates
1378 /// subtarget's specifications, or does not meet number of waves per execution
1379 /// unit requirement.
1380 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1381
1382 /// \returns Maximum number of SGPRs that meets number of waves per execution
1383 /// unit requirement for function \p F, or number of SGPRs explicitly
1384 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1385 ///
1386 /// \returns Value that meets number of waves per execution unit requirement
1387 /// if explicitly requested value cannot be converted to integer, violates
1388 /// subtarget's specifications, or does not meet number of waves per execution
1389 /// unit requirement.
1390 unsigned getMaxNumSGPRs(const Function &F) const;
1391
1392 /// \returns VGPR allocation granularity supported by the subtarget.
1393 unsigned getVGPRAllocGranule() const {
1395 }
1396
1397 /// \returns VGPR encoding granularity supported by the subtarget.
1398 unsigned getVGPREncodingGranule() const {
1400 }
1401
1402 /// \returns Total number of VGPRs supported by the subtarget.
1403 unsigned getTotalNumVGPRs() const {
1405 }
1406
1407 /// \returns Addressable number of architectural VGPRs supported by the
1408 /// subtarget.
1411 }
1412
1413 /// \returns Addressable number of VGPRs supported by the subtarget.
1414 unsigned getAddressableNumVGPRs() const {
1416 }
1417
1418 /// \returns the minimum number of VGPRs that will prevent achieving more than
1419 /// the specified number of waves \p WavesPerEU.
1420 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1421 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1422 }
1423
1424 /// \returns the maximum number of VGPRs that can be used and still achieved
1425 /// at least the specified number of waves \p WavesPerEU.
1426 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1427 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1428 }
1429
1430 /// \returns max num VGPRs. This is the common utility function
1431 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1432 unsigned getBaseMaxNumVGPRs(const Function &F,
1433 std::pair<unsigned, unsigned> WavesPerEU) const;
1434 /// \returns Maximum number of VGPRs that meets number of waves per execution
1435 /// unit requirement for function \p F, or number of VGPRs explicitly
1436 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1437 ///
1438 /// \returns Value that meets number of waves per execution unit requirement
1439 /// if explicitly requested value cannot be converted to integer, violates
1440 /// subtarget's specifications, or does not meet number of waves per execution
1441 /// unit requirement.
1442 unsigned getMaxNumVGPRs(const Function &F) const;
1443
1444 unsigned getMaxNumAGPRs(const Function &F) const {
1445 return getMaxNumVGPRs(F);
1446 }
1447
1448 /// \returns Maximum number of VGPRs that meets number of waves per execution
1449 /// unit requirement for function \p MF, or number of VGPRs explicitly
1450 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1451 ///
1452 /// \returns Value that meets number of waves per execution unit requirement
1453 /// if explicitly requested value cannot be converted to integer, violates
1454 /// subtarget's specifications, or does not meet number of waves per execution
1455 /// unit requirement.
1456 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1457
1458 void getPostRAMutations(
1459 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1460 const override;
1461
1462 std::unique_ptr<ScheduleDAGMutation>
1464
1465 bool isWave32() const {
1466 return getWavefrontSize() == 32;
1467 }
1468
1469 bool isWave64() const {
1470 return getWavefrontSize() == 64;
1471 }
1472
1474 return getRegisterInfo()->getBoolRC();
1475 }
1476
1477 /// \returns Maximum number of work groups per compute unit supported by the
1478 /// subtarget and limited by given \p FlatWorkGroupSize.
1479 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1480 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1481 }
1482
1483 /// \returns Minimum flat work group size supported by the subtarget.
1484 unsigned getMinFlatWorkGroupSize() const override {
1486 }
1487
1488 /// \returns Maximum flat work group size supported by the subtarget.
1489 unsigned getMaxFlatWorkGroupSize() const override {
1491 }
1492
1493 /// \returns Number of waves per execution unit required to support the given
1494 /// \p FlatWorkGroupSize.
1495 unsigned
1496 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1497 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1498 }
1499
1500 /// \returns Minimum number of waves per execution unit supported by the
1501 /// subtarget.
1502 unsigned getMinWavesPerEU() const override {
1504 }
1505
1506 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1507 SDep &Dep,
1508 const TargetSchedModel *SchedModel) const override;
1509
1510 // \returns true if it's beneficial on this subtarget for the scheduler to
1511 // cluster stores as well as loads.
1512 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1513
1514 // \returns the number of address arguments from which to enable MIMG NSA
1515 // on supported architectures.
1516 unsigned getNSAThreshold(const MachineFunction &MF) const;
1517
1518 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1519 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1521 // Currently all targets that support the dealloc VGPRs message also require
1522 // the nop.
1523 return true;
1524 }
1525};
1526
1528public:
1529 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1530
1531 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1532
1533 bool hasDispatchPtr() const { return DispatchPtr; }
1534
1535 bool hasQueuePtr() const { return QueuePtr; }
1536
1537 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1538
1539 bool hasDispatchID() const { return DispatchID; }
1540
1541 bool hasFlatScratchInit() const { return FlatScratchInit; }
1542
1543 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1544
1545 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1546
1547 unsigned getNumFreeUserSGPRs();
1548
1549 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1550
1551 enum UserSGPRID : unsigned {
1561
1562 // Returns the size in number of SGPRs for preload user SGPR field.
1564 switch (ID) {
1566 return 2;
1568 return 4;
1569 case DispatchPtrID:
1570 return 2;
1571 case QueuePtrID:
1572 return 2;
1574 return 2;
1575 case DispatchIdID:
1576 return 2;
1577 case FlatScratchInitID:
1578 return 2;
1580 return 1;
1581 }
1582 llvm_unreachable("Unknown UserSGPRID.");
1583 }
1584
1585 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1586
1587private:
1588 const GCNSubtarget &ST;
1589
1590 // Private memory buffer
1591 // Compute directly in sgpr[0:1]
1592 // Other shaders indirect 64-bits at sgpr[0:1]
1593 bool ImplicitBufferPtr = false;
1594
1595 bool PrivateSegmentBuffer = false;
1596
1597 bool DispatchPtr = false;
1598
1599 bool QueuePtr = false;
1600
1601 bool KernargSegmentPtr = false;
1602
1603 bool DispatchID = false;
1604
1605 bool FlatScratchInit = false;
1606
1607 unsigned NumKernargPreloadSGPRs = 0;
1608
1609 unsigned NumUsedUserSGPRs = 0;
1610};
1611
1612} // end namespace llvm
1613
1614#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
Definition: GCNSubtarget.h:891
bool hasFlat() const
Definition: GCNSubtarget.h:372
bool hasD16Images() const
Definition: GCNSubtarget.h:686
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:819
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:735
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:219
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:475
bool hasSwap() const
Definition: GCNSubtarget.h:430
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:815
bool hasDot2Insts() const
Definition: GCNSubtarget.h:765
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:678
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:739
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:556
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:950
bool has1_5xVGPRs() const
int getLDSBankCount() const
Definition: GCNSubtarget.h:327
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:378
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:464
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:455
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:912
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:981
unsigned getSGPRAllocGranule() const
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:664
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:674
bool flatScratchIsPointer() const
bool hasSDWAMac() const
Definition: GCNSubtarget.h:747
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:813
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:785
bool hasApertureRegs() const
Definition: GCNSubtarget.h:584
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:459
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:614
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:883
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:845
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:859
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:916
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:755
bool hasExtendedImageInsts() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:398
bool hasMAIInsts() const
Definition: GCNSubtarget.h:805
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:622
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:666
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:356
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:514
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:863
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:572
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:253
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:761
bool hasDot3Insts() const
Definition: GCNSubtarget.h:769
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:273
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:210
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:550
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:827
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:298
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:188
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
Definition: GCNSubtarget.h:904
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:540
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:442
bool hasDot11Insts() const
Definition: GCNSubtarget.h:801
bool enableFlatScratch() const
Definition: GCNSubtarget.h:639
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:560
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:227
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:644
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:610
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
Definition: GCNSubtarget.h:971
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:448
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:596
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:843
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:809
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:265
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
bool hasVscnt() const
Definition: GCNSubtarget.h:855
bool hasMad64_32() const
Definition: GCNSubtarget.h:731
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:277
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:117
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:524
bool hasExtendedWaitCounts() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:682
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:759
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:887
unsigned maxHardClauseLength() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:727
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:655
bool hasDPP() const
Definition: GCNSubtarget.h:973
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:659
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:285
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:444
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:600
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:960
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:257
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:576
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:294
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:751
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:289
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:635
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:500
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:879
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:568
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:414
bool hasIntClamp() const
Definition: GCNSubtarget.h:344
bool hasGFX10_AEncoding() const
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:670
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:496
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:261
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:995
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:781
bool hasGFX940Insts() const
bool hasLshlAddB64() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:364
bool hasScalarStores() const
Definition: GCNSubtarget.h:956
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:588
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:906
bool hasLDSFPAtomicAddF64() const
Definition: GCNSubtarget.h:965
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:163
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:618
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:917
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:720
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:323
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:871
bool hasSDWASdst() const
Definition: GCNSubtarget.h:743
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:172
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:434
bool hasFFBL() const
Definition: GCNSubtarget.h:402
bool hasNSAEncoding() const
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:936
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:606
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:977
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:546
bool hasMovB64() const
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:410
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:940
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:821
bool hasBFI() const
Definition: GCNSubtarget.h:390
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:692
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:216
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:944
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:167
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:564
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:331
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
Definition: GCNSubtarget.h:426
bool hasDot10Insts() const
Definition: GCNSubtarget.h:797
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:509
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:406
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:633
bool supportsWGP() const
Definition: GCNSubtarget.h:342
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:360
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:823
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:166
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:196
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:831
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:352
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:985
bool hasDot5Insts() const
Definition: GCNSubtarget.h:777
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:932
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:829
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:176
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:968
bool hasFlatScratchSVSSwizzleBug() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
Definition: GCNSubtarget.h:954
bool hasVDecCoExecHazard() const
bool hasLDSFPAtomicAddF32() const
Definition: GCNSubtarget.h:964
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:394
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:534
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:789
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:438
bool hasSCmpK() const
Definition: GCNSubtarget.h:894
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:281
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:529
bool hasGWS() const
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:518
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:239
Generation getGeneration() const
Definition: GCNSubtarget.h:304
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:875
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:835
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:851
bool hasBufferFlatGlobalAtomicsF64() const
Definition: GCNSubtarget.h:653
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:592
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:718
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:722
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:923
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:481
bool hasRFEHazards() const
Definition: GCNSubtarget.h:491
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:487
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:629
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:706
bool hasDPALU_DPP() const
Definition: GCNSubtarget.h:989
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:839
bool hasAddr64() const
Definition: GCNSubtarget.h:368
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:215
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:169
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:580
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:470
bool isWave64() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:418
bool hasCARRY() const
Definition: GCNSubtarget.h:422
bool hasPackedTID() const
bool hasFP64() const
Definition: GCNSubtarget.h:348
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:714
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:867
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:94
bool hasFractBug() const
Definition: GCNSubtarget.h:382
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:604
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
Definition: GCNSubtarget.h:993
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:308
bool hasDot4Insts() const
Definition: GCNSubtarget.h:773
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:793
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:648
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:847
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:269
bool hasBFE() const
Definition: GCNSubtarget.h:386
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:701
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.