LLVM 17.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
46 LLVMAMDHSATrap = 0x02,
48 };
49
50private:
51 /// GlobalISel related APIs.
52 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
53 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
54 std::unique_ptr<InstructionSelector> InstSelector;
55 std::unique_ptr<LegalizerInfo> Legalizer;
56 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
57
58protected:
59 // Basic subtarget description.
62 unsigned Gen = INVALID;
64 int LDSBankCount = 0;
66
67 // Possibly statically set by tablegen, but may want to be overridden.
68 bool FastFMAF32 = false;
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81
82 // This should not be used directly. 'TargetID' tracks the dynamic settings
83 // for XNACK.
84 bool EnableXNACK = false;
85
86 bool EnableTgSplit = false;
87 bool EnableCuMode = false;
88 bool TrapHandler = false;
89
90 // Used as options.
91 bool EnableLoadStoreOpt = false;
93 bool EnableSIScheduler = false;
94 bool EnableDS128 = false;
95 bool EnablePRTStrictNull = false;
96 bool DumpCode = false;
97
98 // Subtarget statically properties set by tablegen
99 bool FP64 = false;
100 bool FMA = false;
101 bool MIMG_R128 = false;
102 bool CIInsts = false;
103 bool GFX8Insts = false;
104 bool GFX9Insts = false;
105 bool GFX90AInsts = false;
106 bool GFX940Insts = false;
107 bool GFX10Insts = false;
108 bool GFX11Insts = false;
109 bool GFX10_3Insts = false;
110 bool GFX7GFX8GFX9Insts = false;
111 bool SGPRInitBug = false;
112 bool UserSGPRInit16Bug = false;
115 bool HasSMemRealTime = false;
116 bool HasIntClamp = false;
117 bool HasFmaMixInsts = false;
118 bool HasMovrel = false;
119 bool HasVGPRIndexMode = false;
120 bool HasScalarStores = false;
121 bool HasScalarAtomics = false;
122 bool HasSDWAOmod = false;
123 bool HasSDWAScalar = false;
124 bool HasSDWASdst = false;
125 bool HasSDWAMac = false;
126 bool HasSDWAOutModsVOPC = false;
127 bool HasDPP = false;
128 bool HasDPP8 = false;
129 bool Has64BitDPP = false;
130 bool HasPackedFP32Ops = false;
131 bool HasImageInsts = false;
133 bool HasR128A16 = false;
134 bool HasA16 = false;
135 bool HasG16 = false;
136 bool HasNSAEncoding = false;
138 bool GFX10_AEncoding = false;
139 bool GFX10_BEncoding = false;
140 bool HasDLInsts = false;
141 bool HasFmacF64Inst = false;
142 bool HasDot1Insts = false;
143 bool HasDot2Insts = false;
144 bool HasDot3Insts = false;
145 bool HasDot4Insts = false;
146 bool HasDot5Insts = false;
147 bool HasDot6Insts = false;
148 bool HasDot7Insts = false;
149 bool HasDot8Insts = false;
150 bool HasDot9Insts = false;
151 bool HasDot10Insts = false;
152 bool HasMAIInsts = false;
153 bool HasFP8Insts = false;
154 bool HasPkFmacF16Inst = false;
163 bool SupportsSRAMECC = false;
164
165 // This should not be used directly. 'TargetID' tracks the dynamic settings
166 // for SRAMECC.
167 bool EnableSRAMECC = false;
168
169 bool HasNoSdstCMPX = false;
170 bool HasVscnt = false;
171 bool HasGetWaveIdInst = false;
172 bool HasSMemTimeInst = false;
174 bool HasVOP3Literal = false;
175 bool HasNoDataDepHazard = false;
176 bool FlatAddressSpace = false;
177 bool FlatInstOffsets = false;
178 bool FlatGlobalInsts = false;
179 bool FlatScratchInsts = false;
182 bool EnableFlatScratch = false;
184 bool AddNoCarryInsts = false;
185 bool HasUnpackedD16VMem = false;
186 bool LDSMisalignedBug = false;
189 bool UnalignedDSAccess = false;
190 bool HasPackedTID = false;
191 bool ScalarizeGlobal = false;
192
199 bool HasNSAtoVMEMBug = false;
200 bool HasNSAClauseBug = false;
201 bool HasOffset3fBug = false;
205 bool HasGFX11FullVGPRs = false;
206 bool HasMADIntraFwdBug = false;
207 bool HasVOPDInsts = false;
209
210 // Dummy feature to use for assembler in tablegen.
211 bool FeatureDisable = false;
212
214private:
215 SIInstrInfo InstrInfo;
216 SITargetLowering TLInfo;
217 SIFrameLowering FrameLowering;
218
219public:
220 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
221 const GCNTargetMachine &TM);
222 ~GCNSubtarget() override;
223
225 StringRef GPU, StringRef FS);
226
227 const SIInstrInfo *getInstrInfo() const override {
228 return &InstrInfo;
229 }
230
231 const SIFrameLowering *getFrameLowering() const override {
232 return &FrameLowering;
233 }
234
235 const SITargetLowering *getTargetLowering() const override {
236 return &TLInfo;
237 }
238
239 const SIRegisterInfo *getRegisterInfo() const override {
240 return &InstrInfo.getRegisterInfo();
241 }
242
243 const CallLowering *getCallLowering() const override {
244 return CallLoweringInfo.get();
245 }
246
247 const InlineAsmLowering *getInlineAsmLowering() const override {
248 return InlineAsmLoweringInfo.get();
249 }
250
252 return InstSelector.get();
253 }
254
255 const LegalizerInfo *getLegalizerInfo() const override {
256 return Legalizer.get();
257 }
258
259 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
260 return RegBankInfo.get();
261 }
262
264 return TargetID;
265 }
266
267 // Nothing implemented, just prevent crashes on use.
269 return &TSInfo;
270 }
271
273 return &InstrItins;
274 }
275
277
279 return (Generation)Gen;
280 }
281
282 unsigned getMaxWaveScratchSize() const {
283 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
284 if (getGeneration() < GFX11) {
285 // 13-bit field in units of 256-dword.
286 return (256 * 4) * ((1 << 13) - 1);
287 }
288 // 15-bit field in units of 64-dword.
289 return (64 * 4) * ((1 << 15) - 1);
290 }
291
292 /// Return the number of high bits known to be zero for a frame index.
295 }
296
297 int getLDSBankCount() const {
298 return LDSBankCount;
299 }
300
301 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
302 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
303 }
304
305 unsigned getConstantBusLimit(unsigned Opcode) const;
306
307 /// Returns if the result of this instruction with a 16-bit result returned in
308 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
309 /// the original value.
310 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
311
312 bool supportsWGP() const { return getGeneration() >= GFX10; }
313
314 bool hasIntClamp() const {
315 return HasIntClamp;
316 }
317
318 bool hasFP64() const {
319 return FP64;
320 }
321
322 bool hasMIMG_R128() const {
323 return MIMG_R128;
324 }
325
326 bool hasHWFP64() const {
327 return FP64;
328 }
329
330 bool hasFastFMAF32() const {
331 return FastFMAF32;
332 }
333
334 bool hasHalfRate64Ops() const {
335 return HalfRate64Ops;
336 }
337
338 bool hasFullRate64Ops() const {
339 return FullRate64Ops;
340 }
341
342 bool hasAddr64() const {
344 }
345
346 bool hasFlat() const {
348 }
349
350 // Return true if the target only has the reverse operand versions of VALU
351 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
352 bool hasOnlyRevVALUShifts() const {
354 }
355
356 bool hasFractBug() const {
358 }
359
360 bool hasBFE() const {
361 return true;
362 }
363
364 bool hasBFI() const {
365 return true;
366 }
367
368 bool hasBFM() const {
369 return hasBFE();
370 }
371
372 bool hasBCNT(unsigned Size) const {
373 return true;
374 }
375
376 bool hasFFBL() const {
377 return true;
378 }
379
380 bool hasFFBH() const {
381 return true;
382 }
383
384 bool hasMed3_16() const {
386 }
387
388 bool hasMin3Max3_16() const {
390 }
391
392 bool hasFmaMixInsts() const {
393 return HasFmaMixInsts;
394 }
395
396 bool hasCARRY() const {
397 return true;
398 }
399
400 bool hasFMA() const {
401 return FMA;
402 }
403
404 bool hasSwap() const {
405 return GFX9Insts;
406 }
407
408 bool hasScalarPackInsts() const {
409 return GFX9Insts;
410 }
411
412 bool hasScalarMulHiInsts() const {
413 return GFX9Insts;
414 }
415
418 }
419
421 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
422 return getGeneration() >= GFX9;
423 }
424
425 /// True if the offset field of DS instructions works as expected. On SI, the
426 /// offset uses a 16-bit adder and does not always wrap properly.
427 bool hasUsableDSOffset() const {
428 return getGeneration() >= SEA_ISLANDS;
429 }
430
433 }
434
435 /// Condition output from div_scale is usable.
438 }
439
440 /// Extra wait hazard is needed in some cases before
441 /// s_cbranch_vccnz/s_cbranch_vccz.
442 bool hasReadVCCZBug() const {
443 return getGeneration() <= SEA_ISLANDS;
444 }
445
446 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
448 return getGeneration() >= GFX10;
449 }
450
451 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
452 /// was written by a VALU instruction.
455 }
456
457 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
458 /// SGPR was written by a VALU Instruction.
461 }
462
463 bool hasRFEHazards() const {
465 }
466
467 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
468 unsigned getSetRegWaitStates() const {
469 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
470 }
471
472 bool dumpCode() const {
473 return DumpCode;
474 }
475
476 /// Return the amount of LDS that can be used that will not restrict the
477 /// occupancy lower than WaveCount.
478 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
479 const Function &) const;
480
483 }
484
485 /// \returns If target supports S_DENORM_MODE.
486 bool hasDenormModeInst() const {
488 }
489
490 bool useFlatForGlobal() const {
491 return FlatForGlobal;
492 }
493
494 /// \returns If target supports ds_read/write_b128 and user enables generation
495 /// of ds_read/write_b128.
496 bool useDS128() const {
497 return CIInsts && EnableDS128;
498 }
499
500 /// \return If target supports ds_read/write_b96/128.
501 bool hasDS96AndDS128() const {
502 return CIInsts;
503 }
504
505 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
506 bool haveRoundOpsF64() const {
507 return CIInsts;
508 }
509
510 /// \returns If MUBUF instructions always perform range checking, even for
511 /// buffer resources used for private memory access.
514 }
515
516 /// \returns If target requires PRT Struct NULL support (zero result registers
517 /// for sparse texture support).
518 bool usePRTStrictNull() const {
519 return EnablePRTStrictNull;
520 }
521
524 }
525
526 /// \returns true if the target supports backing off of s_barrier instructions
527 /// when an exception is raised.
529 return BackOffBarrier;
530 }
531
534 }
535
538 }
539
540 bool hasUnalignedDSAccess() const {
541 return UnalignedDSAccess;
542 }
543
546 }
547
550 }
551
553 return UnalignedAccessMode;
554 }
555
556 bool hasApertureRegs() const {
557 return HasApertureRegs;
558 }
559
560 bool isTrapHandlerEnabled() const {
561 return TrapHandler;
562 }
563
564 bool isXNACKEnabled() const {
565 return TargetID.isXnackOnOrAny();
566 }
567
568 bool isTgSplitEnabled() const {
569 return EnableTgSplit;
570 }
571
572 bool isCuModeEnabled() const {
573 return EnableCuMode;
574 }
575
576 bool hasFlatAddressSpace() const {
577 return FlatAddressSpace;
578 }
579
580 bool hasFlatScrRegister() const {
581 return hasFlatAddressSpace();
582 }
583
584 bool hasFlatInstOffsets() const {
585 return FlatInstOffsets;
586 }
587
588 bool hasFlatGlobalInsts() const {
589 return FlatGlobalInsts;
590 }
591
592 bool hasFlatScratchInsts() const {
593 return FlatScratchInsts;
594 }
595
596 // Check if target supports ST addressing mode with FLAT scratch instructions.
597 // The ST addressing mode means no registers are used, either VGPR or SGPR,
598 // but only immediate offset is swizzled and added to the FLAT scratch base.
599 bool hasFlatScratchSTMode() const {
601 }
602
603 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
604
607 }
608
609 bool enableFlatScratch() const {
610 return flatScratchIsArchitected() ||
612 }
613
614 bool hasGlobalAddTidInsts() const {
615 return GFX10_BEncoding;
616 }
617
618 bool hasAtomicCSub() const {
619 return GFX10_BEncoding;
620 }
621
623 return getGeneration() >= GFX9;
624 }
625
628 }
629
631 return getGeneration() > GFX9;
632 }
633
634 bool hasD16LoadStore() const {
635 return getGeneration() >= GFX9;
636 }
637
640 }
641
642 bool hasD16Images() const {
644 }
645
646 /// Return if most LDS instructions have an m0 use that require m0 to be
647 /// initialized.
648 bool ldsRequiresM0Init() const {
649 return getGeneration() < GFX9;
650 }
651
652 // True if the hardware rewinds and replays GWS operations if a wave is
653 // preempted.
654 //
655 // If this is false, a GWS operation requires testing if a nack set the
656 // MEM_VIOL bit, and repeating if so.
657 bool hasGWSAutoReplay() const {
658 return getGeneration() >= GFX9;
659 }
660
661 /// \returns if target has ds_gws_sema_release_all instruction.
662 bool hasGWSSemaReleaseAll() const {
663 return CIInsts;
664 }
665
666 /// \returns true if the target has integer add/sub instructions that do not
667 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
668 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
669 /// for saturation.
670 bool hasAddNoCarry() const {
671 return AddNoCarryInsts;
672 }
673
674 bool hasUnpackedD16VMem() const {
675 return HasUnpackedD16VMem;
676 }
677
678 // Covers VS/PS/CS graphics shaders
679 bool isMesaGfxShader(const Function &F) const {
680 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
681 }
682
683 bool hasMad64_32() const {
684 return getGeneration() >= SEA_ISLANDS;
685 }
686
687 bool hasSDWAOmod() const {
688 return HasSDWAOmod;
689 }
690
691 bool hasSDWAScalar() const {
692 return HasSDWAScalar;
693 }
694
695 bool hasSDWASdst() const {
696 return HasSDWASdst;
697 }
698
699 bool hasSDWAMac() const {
700 return HasSDWAMac;
701 }
702
703 bool hasSDWAOutModsVOPC() const {
704 return HasSDWAOutModsVOPC;
705 }
706
707 bool hasDLInsts() const {
708 return HasDLInsts;
709 }
710
711 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
712
713 bool hasDot1Insts() const {
714 return HasDot1Insts;
715 }
716
717 bool hasDot2Insts() const {
718 return HasDot2Insts;
719 }
720
721 bool hasDot3Insts() const {
722 return HasDot3Insts;
723 }
724
725 bool hasDot4Insts() const {
726 return HasDot4Insts;
727 }
728
729 bool hasDot5Insts() const {
730 return HasDot5Insts;
731 }
732
733 bool hasDot6Insts() const {
734 return HasDot6Insts;
735 }
736
737 bool hasDot7Insts() const {
738 return HasDot7Insts;
739 }
740
741 bool hasDot8Insts() const {
742 return HasDot8Insts;
743 }
744
745 bool hasDot9Insts() const {
746 return HasDot9Insts;
747 }
748
749 bool hasDot10Insts() const {
750 return HasDot10Insts;
751 }
752
753 bool hasMAIInsts() const {
754 return HasMAIInsts;
755 }
756
757 bool hasFP8Insts() const {
758 return HasFP8Insts;
759 }
760
761 bool hasPkFmacF16Inst() const {
762 return HasPkFmacF16Inst;
763 }
764
766
768
769 bool hasAtomicFaddInsts() const {
771 }
772
774
776
779 }
780
783 }
784
787 }
788
790
791 bool hasNoSdstCMPX() const {
792 return HasNoSdstCMPX;
793 }
794
795 bool hasVscnt() const {
796 return HasVscnt;
797 }
798
799 bool hasGetWaveIdInst() const {
800 return HasGetWaveIdInst;
801 }
802
803 bool hasSMemTimeInst() const {
804 return HasSMemTimeInst;
805 }
806
809 }
810
811 bool hasVOP3Literal() const {
812 return HasVOP3Literal;
813 }
814
815 bool hasNoDataDepHazard() const {
816 return HasNoDataDepHazard;
817 }
818
820 return getGeneration() < SEA_ISLANDS;
821 }
822
823 bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
824
825 // Scratch is allocated in 256 dword per wave blocks for the entire
826 // wavefront. When viewed from the perspective of an arbitrary workitem, this
827 // is 4-byte aligned.
828 //
829 // Only 4-byte alignment is really needed to access anything. Transformations
830 // on the pointer value itself may rely on the alignment / known low bits of
831 // the pointer. Set this to something above the minimum to avoid needing
832 // dynamic realignment in common cases.
833 Align getStackAlignment() const { return Align(16); }
834
835 bool enableMachineScheduler() const override {
836 return true;
837 }
838
839 bool useAA() const override;
840
841 bool enableSubRegLiveness() const override {
842 return true;
843 }
844
847
848 // static wrappers
849 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
850
851 // XXX - Why is this here if it isn't in the default pass set?
852 bool enableEarlyIfConversion() const override {
853 return true;
854 }
855
857 unsigned NumRegionInstrs) const override;
858
859 unsigned getMaxNumUserSGPRs() const {
860 return 16;
861 }
862
863 bool hasSMemRealTime() const {
864 return HasSMemRealTime;
865 }
866
867 bool hasMovrel() const {
868 return HasMovrel;
869 }
870
871 bool hasVGPRIndexMode() const {
872 return HasVGPRIndexMode;
873 }
874
875 bool useVGPRIndexMode() const;
876
877 bool hasScalarCompareEq64() const {
879 }
880
881 bool hasScalarStores() const {
882 return HasScalarStores;
883 }
884
885 bool hasScalarAtomics() const {
886 return HasScalarAtomics;
887 }
888
889 bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
890
891 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
892 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
893
894 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
895 bool hasPermLane64() const { return getGeneration() >= GFX11; }
896
897 bool hasDPP() const {
898 return HasDPP;
899 }
900
901 bool hasDPPBroadcasts() const {
902 return HasDPP && getGeneration() < GFX10;
903 }
904
906 return HasDPP && getGeneration() < GFX10;
907 }
908
909 bool hasDPP8() const {
910 return HasDPP8;
911 }
912
913 bool has64BitDPP() const {
914 return Has64BitDPP;
915 }
916
917 bool hasPackedFP32Ops() const {
918 return HasPackedFP32Ops;
919 }
920
922 return getGeneration() >= GFX10 || hasGFX940Insts();
923 }
924
925 bool hasImageInsts() const {
926 return HasImageInsts;
927 }
928
931 }
932
933 bool hasR128A16() const {
934 return HasR128A16;
935 }
936
937 bool hasA16() const { return HasA16; }
938
939 bool hasG16() const { return HasG16; }
940
941 bool hasOffset3fBug() const {
942 return HasOffset3fBug;
943 }
944
946
948
949 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
950
951 bool hasNSAEncoding() const { return HasNSAEncoding; }
952
954
955 unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
956
957 bool hasGFX10_AEncoding() const {
958 return GFX10_AEncoding;
959 }
960
961 bool hasGFX10_BEncoding() const {
962 return GFX10_BEncoding;
963 }
964
965 bool hasGFX10_3Insts() const {
966 return GFX10_3Insts;
967 }
968
969 bool hasMadF16() const;
970
971 bool hasMovB64() const { return GFX940Insts; }
972
973 bool hasLshlAddB64() const { return GFX940Insts; }
974
975 bool enableSIScheduler() const {
976 return EnableSIScheduler;
977 }
978
979 bool loadStoreOptEnabled() const {
980 return EnableLoadStoreOpt;
981 }
982
983 bool hasSGPRInitBug() const {
984 return SGPRInitBug;
985 }
986
987 bool hasUserSGPRInit16Bug() const {
988 return UserSGPRInit16Bug && isWave32();
989 }
990
992
995 }
996
999 }
1000
1003 }
1004
1005 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1007 return CIInsts;
1008 }
1009
1012 }
1013
1017 }
1018
1021 }
1022
1025 }
1026
1029 }
1030
1033 }
1034
1037 }
1038
1039 bool hasLDSMisalignedBug() const {
1040 return LDSMisalignedBug && !EnableCuMode;
1041 }
1042
1044 return HasInstFwdPrefetchBug;
1045 }
1046
1048 return HasVcmpxExecWARHazard;
1049 }
1050
1053 }
1054
1055 // Shift amount of a 64 bit shift cannot be a highest allocated register
1056 // if also at the end of the allocation block.
1058 return GFX90AInsts && !GFX940Insts;
1059 }
1060
1061 // Has one cycle hazard on transcendental instruction feeding a
1062 // non transcendental VALU.
1063 bool hasTransForwardingHazard() const { return GFX940Insts; }
1064
1065 // Has one cycle hazard on a VALU instruction partially writing dst with
1066 // a shift of result bits feeding another VALU instruction.
1068
1069 // Cannot use op_sel with v_dot instructions.
1070 bool hasDOTOpSelHazard() const { return GFX940Insts; }
1071
1072 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1073 bool hasVDecCoExecHazard() const {
1074 return GFX940Insts;
1075 }
1076
1077 bool hasNSAtoVMEMBug() const {
1078 return HasNSAtoVMEMBug;
1079 }
1080
1081 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1082
1083 bool hasHardClauses() const { return getGeneration() >= GFX10; }
1084
1085 bool hasGFX90AInsts() const { return GFX90AInsts; }
1086
1088 return getGeneration() == GFX10;
1089 }
1090
1091 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1092
1093 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1094
1096 return getGeneration() >= GFX11;
1097 }
1098
1100
1101 bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1102
1103 /// Return if operations acting on VGPR tuples require even alignment.
1104 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1105
1106 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1107 bool hasSPackHL() const { return GFX11Insts; }
1108
1109 /// Return true if the target's EXP instruction has the COMPR flag, which
1110 /// affects the meaning of the EN (enable) bits.
1111 bool hasCompressedExport() const { return !GFX11Insts; }
1112
1113 /// Return true if the target's EXP instruction supports the NULL export
1114 /// target.
1115 bool hasNullExportTarget() const { return !GFX11Insts; }
1116
1117 bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1118
1119 bool hasVOPDInsts() const { return HasVOPDInsts; }
1120
1122
1123 /// Return true if the target has the S_DELAY_ALU instruction.
1124 bool hasDelayAlu() const { return GFX11Insts; }
1125
1126 bool hasPackedTID() const { return HasPackedTID; }
1127
1128 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1129 // hasGFX90AInsts is also true.
1130 bool hasGFX940Insts() const { return GFX940Insts; }
1131
1132 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1133 /// SGPRs
1134 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1135
1136 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1137 /// VGPRs
1138 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1139
1140 /// Return occupancy for the given function. Used LDS and a number of
1141 /// registers if provided.
1142 /// Note, occupancy can be affected by the scratch allocation as well, but
1143 /// we do not have enough information to compute it.
1144 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1145 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1146
1147 /// \returns true if the flat_scratch register should be initialized with the
1148 /// pointer to the wave's scratch memory rather than a size and offset.
1151 }
1152
1153 /// \returns true if the flat_scratch register is initialized by the HW.
1154 /// In this case it is readonly.
1156
1157 /// \returns true if the architected SGPRs are enabled.
1159
1160 /// \returns true if the machine has merged shaders in which s0-s7 are
1161 /// reserved by the hardware and user SGPRs start at s8
1162 bool hasMergedShaders() const {
1163 return getGeneration() >= GFX9;
1164 }
1165
1166 // \returns true if the target supports the pre-NGG legacy geometry path.
1167 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1168
1169 /// \returns SGPR allocation granularity supported by the subtarget.
1170 unsigned getSGPRAllocGranule() const {
1172 }
1173
1174 /// \returns SGPR encoding granularity supported by the subtarget.
1175 unsigned getSGPREncodingGranule() const {
1177 }
1178
1179 /// \returns Total number of SGPRs supported by the subtarget.
1180 unsigned getTotalNumSGPRs() const {
1182 }
1183
1184 /// \returns Addressable number of SGPRs supported by the subtarget.
1185 unsigned getAddressableNumSGPRs() const {
1187 }
1188
1189 /// \returns Minimum number of SGPRs that meets the given number of waves per
1190 /// execution unit requirement supported by the subtarget.
1191 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1192 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1193 }
1194
1195 /// \returns Maximum number of SGPRs that meets the given number of waves per
1196 /// execution unit requirement supported by the subtarget.
1197 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1198 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1199 }
1200
1201 /// \returns Reserved number of SGPRs. This is common
1202 /// utility function called by MachineFunction and
1203 /// Function variants of getReservedNumSGPRs.
1204 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1205 /// \returns Reserved number of SGPRs for given machine function \p MF.
1206 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1207
1208 /// \returns Reserved number of SGPRs for given function \p F.
1209 unsigned getReservedNumSGPRs(const Function &F) const;
1210
1211 /// \returns max num SGPRs. This is the common utility
1212 /// function called by MachineFunction and Function
1213 /// variants of getMaxNumSGPRs.
1214 unsigned getBaseMaxNumSGPRs(const Function &F,
1215 std::pair<unsigned, unsigned> WavesPerEU,
1216 unsigned PreloadedSGPRs,
1217 unsigned ReservedNumSGPRs) const;
1218
1219 /// \returns Maximum number of SGPRs that meets number of waves per execution
1220 /// unit requirement for function \p MF, or number of SGPRs explicitly
1221 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1222 ///
1223 /// \returns Value that meets number of waves per execution unit requirement
1224 /// if explicitly requested value cannot be converted to integer, violates
1225 /// subtarget's specifications, or does not meet number of waves per execution
1226 /// unit requirement.
1227 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1228
1229 /// \returns Maximum number of SGPRs that meets number of waves per execution
1230 /// unit requirement for function \p F, or number of SGPRs explicitly
1231 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1232 ///
1233 /// \returns Value that meets number of waves per execution unit requirement
1234 /// if explicitly requested value cannot be converted to integer, violates
1235 /// subtarget's specifications, or does not meet number of waves per execution
1236 /// unit requirement.
1237 unsigned getMaxNumSGPRs(const Function &F) const;
1238
1239 /// \returns VGPR allocation granularity supported by the subtarget.
1240 unsigned getVGPRAllocGranule() const {
1242 }
1243
1244 /// \returns VGPR encoding granularity supported by the subtarget.
1245 unsigned getVGPREncodingGranule() const {
1247 }
1248
1249 /// \returns Total number of VGPRs supported by the subtarget.
1250 unsigned getTotalNumVGPRs() const {
1252 }
1253
1254 /// \returns Addressable number of VGPRs supported by the subtarget.
1255 unsigned getAddressableNumVGPRs() const {
1257 }
1258
1259 /// \returns the minimum number of VGPRs that will prevent achieving more than
1260 /// the specified number of waves \p WavesPerEU.
1261 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1262 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1263 }
1264
1265 /// \returns the maximum number of VGPRs that can be used and still achieved
1266 /// at least the specified number of waves \p WavesPerEU.
1267 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1268 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1269 }
1270
1271 /// \returns max num VGPRs. This is the common utility function
1272 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1273 unsigned getBaseMaxNumVGPRs(const Function &F,
1274 std::pair<unsigned, unsigned> WavesPerEU) const;
1275 /// \returns Maximum number of VGPRs that meets number of waves per execution
1276 /// unit requirement for function \p F, or number of VGPRs explicitly
1277 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1278 ///
1279 /// \returns Value that meets number of waves per execution unit requirement
1280 /// if explicitly requested value cannot be converted to integer, violates
1281 /// subtarget's specifications, or does not meet number of waves per execution
1282 /// unit requirement.
1283 unsigned getMaxNumVGPRs(const Function &F) const;
1284
1285 unsigned getMaxNumAGPRs(const Function &F) const {
1286 return getMaxNumVGPRs(F);
1287 }
1288
1289 /// \returns Maximum number of VGPRs that meets number of waves per execution
1290 /// unit requirement for function \p MF, or number of VGPRs explicitly
1291 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1292 ///
1293 /// \returns Value that meets number of waves per execution unit requirement
1294 /// if explicitly requested value cannot be converted to integer, violates
1295 /// subtarget's specifications, or does not meet number of waves per execution
1296 /// unit requirement.
1297 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1298
1299 void getPostRAMutations(
1300 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1301 const override;
1302
1303 std::unique_ptr<ScheduleDAGMutation>
1305
1306 bool isWave32() const {
1307 return getWavefrontSize() == 32;
1308 }
1309
1310 bool isWave64() const {
1311 return getWavefrontSize() == 64;
1312 }
1313
1315 return getRegisterInfo()->getBoolRC();
1316 }
1317
1318 /// \returns Maximum number of work groups per compute unit supported by the
1319 /// subtarget and limited by given \p FlatWorkGroupSize.
1320 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1321 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1322 }
1323
1324 /// \returns Minimum flat work group size supported by the subtarget.
1325 unsigned getMinFlatWorkGroupSize() const override {
1327 }
1328
1329 /// \returns Maximum flat work group size supported by the subtarget.
1330 unsigned getMaxFlatWorkGroupSize() const override {
1332 }
1333
1334 /// \returns Number of waves per execution unit required to support the given
1335 /// \p FlatWorkGroupSize.
1336 unsigned
1337 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1338 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1339 }
1340
1341 /// \returns Minimum number of waves per execution unit supported by the
1342 /// subtarget.
1343 unsigned getMinWavesPerEU() const override {
1345 }
1346
1347 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1348 SDep &Dep) const override;
1349
1350 // \returns true if it's beneficial on this subtarget for the scheduler to
1351 // cluster stores as well as loads.
1352 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1353
1354 // \returns the number of address arguments from which to enable MIMG NSA
1355 // on supported architectures.
1356 unsigned getNSAThreshold(const MachineFunction &MF) const;
1357};
1358
1359} // end namespace llvm
1360
1361#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
return InstrInfo
unsigned UseOpIdx
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasFlat() const
Definition: GCNSubtarget.h:346
bool hasD16Images() const
Definition: GCNSubtarget.h:642
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:63
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:765
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:687
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:198
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:447
bool hasSwap() const
Definition: GCNSubtarget.h:404
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:761
bool hasDot2Insts() const
Definition: GCNSubtarget.h:717
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:634
bool hasMergedShaders() const
bool hasA16() const
Definition: GCNSubtarget.h:937
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:691
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:528
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:877
int getLDSBankCount() const
Definition: GCNSubtarget.h:297
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:352
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:945
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:436
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:427
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:979
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:841
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:905
unsigned getSGPRAllocGranule() const
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:630
bool flatScratchIsPointer() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
bool hasSDWAMac() const
Definition: GCNSubtarget.h:699
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:737
bool hasApertureRegs() const
Definition: GCNSubtarget.h:556
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:65
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:431
bool hasFPAtomicToDenormModeHazard() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:584
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:819
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:799
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:845
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:707
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:929
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:372
bool hasMAIInsts() const
Definition: GCNSubtarget.h:753
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:592
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:622
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:326
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:486
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:997
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:803
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:544
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:991
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:227
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:713
bool hasDot3Insts() const
Definition: GCNSubtarget.h:721
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:949
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:247
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:522
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:773
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:272
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:965
Align getStackAlignment() const
Definition: GCNSubtarget.h:833
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:512
bool enableFlatScratch() const
Definition: GCNSubtarget.h:609
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:532
bool hasR128A16() const
Definition: GCNSubtarget.h:933
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:941
bool hasDwordx3LoadStores() const
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:614
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:983
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:580
bool hasPermLane64() const
Definition: GCNSubtarget.h:895
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:420
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:568
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:789
bool hasFP8Insts() const
Definition: GCNSubtarget.h:757
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:239
bool has64BitDPP() const
Definition: GCNSubtarget.h:913
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:921
bool hasVscnt() const
Definition: GCNSubtarget.h:795
bool hasMad64_32() const
Definition: GCNSubtarget.h:683
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:251
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:114
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:496
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:638
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:711
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:823
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:679
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
Definition: GCNSubtarget.h:987
bool hasDPP() const
Definition: GCNSubtarget.h:897
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:259
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:416
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:572
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:885
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:231
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:548
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:268
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:703
bool hasGFX11FullVGPRs() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:263
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:605
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:889
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:472
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:815
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:540
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:388
bool hasIntClamp() const
Definition: GCNSubtarget.h:314
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:957
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:626
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:468
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:235
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:917
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:733
bool hasGFX940Insts() const
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:973
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:338
bool hasScalarStores() const
Definition: GCNSubtarget.h:881
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:560
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:835
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:156
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:588
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:846
bool hasReadM0LdsDmaHazard() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:293
bool hasSDWASdst() const
Definition: GCNSubtarget.h:695
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:408
bool hasFFBL() const
Definition: GCNSubtarget.h:376
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:951
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:863
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:576
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:901
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:518
bool hasMovB64() const
Definition: GCNSubtarget.h:971
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:384
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:867
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:330
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:767
bool hasBFI() const
Definition: GCNSubtarget.h:364
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:648
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:195
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:871
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:160
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:536
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:301
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
Definition: GCNSubtarget.h:925
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:947
bool hasFMA() const
Definition: GCNSubtarget.h:400
bool hasDot10Insts() const
Definition: GCNSubtarget.h:749
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:481
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:993
bool hasFFBH() const
Definition: GCNSubtarget.h:380
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:603
bool supportsWGP() const
Definition: GCNSubtarget.h:312
bool hasG16() const
Definition: GCNSubtarget.h:939
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:334
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:769
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:159
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:181
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:777
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:322
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:909
bool hasDot5Insts() const
Definition: GCNSubtarget.h:729
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:859
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:775
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:892
bool hasFlatScratchSVSSwizzleBug() const
bool hasVDecCoExecHazard() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:368
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:506
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:741
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:412
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:255
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:501
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:490
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:961
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:213
Generation getGeneration() const
Definition: GCNSubtarget.h:278
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:811
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:781
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:791
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:564
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:674
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:852
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:453
bool hasRFEHazards() const
Definition: GCNSubtarget.h:463
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:459
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:599
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:662
bool enableSIScheduler() const
Definition: GCNSubtarget.h:975
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:785
bool hasAddr64() const
Definition: GCNSubtarget.h:342
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:194
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:161
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:552
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:442
bool isWave64() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:392
bool hasCARRY() const
Definition: GCNSubtarget.h:396
bool hasPackedTID() const
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:955
bool hasFP64() const
Definition: GCNSubtarget.h:318
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:670
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:807
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:92
bool hasFractBug() const
Definition: GCNSubtarget.h:356
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:282
bool hasDot4Insts() const
Definition: GCNSubtarget.h:725
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
Definition: GCNSubtarget.h:953
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:745
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:618
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:61
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:243
bool hasBFE() const
Definition: GCNSubtarget.h:360
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:657
Itinerary data supplied by a subtarget to be used by a target.
Provides the logic to select generic machine instructions.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.