LLVM 18.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
26
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
29
30namespace llvm {
31
32class GCNTargetMachine;
33
35 public AMDGPUSubtarget {
36public:
38
39 // Following 2 enums are documented at:
40 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
41 enum class TrapHandlerAbi {
42 NONE = 0x00,
43 AMDHSA = 0x01,
44 };
45
46 enum class TrapID {
47 LLVMAMDHSATrap = 0x02,
49 };
50
51private:
52 /// GlobalISel related APIs.
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
56 std::unique_ptr<LegalizerInfo> Legalizer;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
58
59protected:
60 // Basic subtarget description.
63 unsigned Gen = INVALID;
65 int LDSBankCount = 0;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81 bool KernargPreload = false;
82
83 // This should not be used directly. 'TargetID' tracks the dynamic settings
84 // for XNACK.
85 bool EnableXNACK = false;
86
87 bool EnableTgSplit = false;
88 bool EnableCuMode = false;
89 bool TrapHandler = false;
90
91 // Used as options.
92 bool EnableLoadStoreOpt = false;
94 bool EnableSIScheduler = false;
95 bool EnableDS128 = false;
96 bool EnablePRTStrictNull = false;
97 bool DumpCode = false;
98
99 // Subtarget statically properties set by tablegen
100 bool FP64 = false;
101 bool FMA = false;
102 bool MIMG_R128 = false;
103 bool CIInsts = false;
104 bool GFX8Insts = false;
105 bool GFX9Insts = false;
106 bool GFX90AInsts = false;
107 bool GFX940Insts = false;
108 bool GFX10Insts = false;
109 bool GFX11Insts = false;
110 bool GFX12Insts = false;
111 bool GFX10_3Insts = false;
112 bool GFX7GFX8GFX9Insts = false;
113 bool SGPRInitBug = false;
114 bool UserSGPRInit16Bug = false;
117 bool HasSMemRealTime = false;
118 bool HasIntClamp = false;
119 bool HasFmaMixInsts = false;
120 bool HasMovrel = false;
121 bool HasVGPRIndexMode = false;
122 bool HasScalarStores = false;
123 bool HasScalarAtomics = false;
124 bool HasSDWAOmod = false;
125 bool HasSDWAScalar = false;
126 bool HasSDWASdst = false;
127 bool HasSDWAMac = false;
128 bool HasSDWAOutModsVOPC = false;
129 bool HasDPP = false;
130 bool HasDPP8 = false;
131 bool HasDPALU_DPP = false;
132 bool HasDPPSrc1SGPR = false;
133 bool HasPackedFP32Ops = false;
134 bool HasImageInsts = false;
136 bool HasR128A16 = false;
137 bool HasA16 = false;
138 bool HasG16 = false;
139 bool HasNSAEncoding = false;
141 bool GFX10_AEncoding = false;
142 bool GFX10_BEncoding = false;
143 bool HasDLInsts = false;
144 bool HasFmacF64Inst = false;
145 bool HasDot1Insts = false;
146 bool HasDot2Insts = false;
147 bool HasDot3Insts = false;
148 bool HasDot4Insts = false;
149 bool HasDot5Insts = false;
150 bool HasDot6Insts = false;
151 bool HasDot7Insts = false;
152 bool HasDot8Insts = false;
153 bool HasDot9Insts = false;
154 bool HasDot10Insts = false;
155 bool HasMAIInsts = false;
156 bool HasFP8Insts = false;
157 bool HasPkFmacF16Inst = false;
167 bool SupportsSRAMECC = false;
168
169 // This should not be used directly. 'TargetID' tracks the dynamic settings
170 // for SRAMECC.
171 bool EnableSRAMECC = false;
172
173 bool HasNoSdstCMPX = false;
174 bool HasVscnt = false;
175 bool HasGetWaveIdInst = false;
176 bool HasSMemTimeInst = false;
178 bool HasVOP3Literal = false;
179 bool HasNoDataDepHazard = false;
180 bool FlatAddressSpace = false;
181 bool FlatInstOffsets = false;
182 bool FlatGlobalInsts = false;
183 bool FlatScratchInsts = false;
186 bool EnableFlatScratch = false;
188 bool HasGDS = false;
189 bool HasGWS = false;
190 bool AddNoCarryInsts = false;
191 bool HasUnpackedD16VMem = false;
192 bool LDSMisalignedBug = false;
195 bool UnalignedDSAccess = false;
196 bool HasPackedTID = false;
197 bool ScalarizeGlobal = false;
198 bool HasSALUFloatInsts = false;
200
207 bool HasNSAtoVMEMBug = false;
208 bool HasNSAClauseBug = false;
209 bool HasOffset3fBug = false;
214 bool HasGFX11FullVGPRs = false;
215 bool HasMADIntraFwdBug = false;
216 bool HasVOPDInsts = false;
219
220 // Dummy feature to use for assembler in tablegen.
221 bool FeatureDisable = false;
222
224private:
225 SIInstrInfo InstrInfo;
226 SITargetLowering TLInfo;
227 SIFrameLowering FrameLowering;
228
229public:
230 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
231 const GCNTargetMachine &TM);
232 ~GCNSubtarget() override;
233
235 StringRef GPU, StringRef FS);
236
237 const SIInstrInfo *getInstrInfo() const override {
238 return &InstrInfo;
239 }
240
241 const SIFrameLowering *getFrameLowering() const override {
242 return &FrameLowering;
243 }
244
245 const SITargetLowering *getTargetLowering() const override {
246 return &TLInfo;
247 }
248
249 const SIRegisterInfo *getRegisterInfo() const override {
250 return &InstrInfo.getRegisterInfo();
251 }
252
253 const CallLowering *getCallLowering() const override {
254 return CallLoweringInfo.get();
255 }
256
257 const InlineAsmLowering *getInlineAsmLowering() const override {
258 return InlineAsmLoweringInfo.get();
259 }
260
262 return InstSelector.get();
263 }
264
265 const LegalizerInfo *getLegalizerInfo() const override {
266 return Legalizer.get();
267 }
268
269 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
270 return RegBankInfo.get();
271 }
272
274 return TargetID;
275 }
276
277 // Nothing implemented, just prevent crashes on use.
279 return &TSInfo;
280 }
281
283 return &InstrItins;
284 }
285
287
289 return (Generation)Gen;
290 }
291
292 unsigned getMaxWaveScratchSize() const {
293 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
294 if (getGeneration() < GFX11) {
295 // 13-bit field in units of 256-dword.
296 return (256 * 4) * ((1 << 13) - 1);
297 }
298 // 15-bit field in units of 64-dword.
299 return (64 * 4) * ((1 << 15) - 1);
300 }
301
302 /// Return the number of high bits known to be zero for a frame index.
305 }
306
307 int getLDSBankCount() const {
308 return LDSBankCount;
309 }
310
311 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
312 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
313 }
314
315 unsigned getConstantBusLimit(unsigned Opcode) const;
316
317 /// Returns if the result of this instruction with a 16-bit result returned in
318 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
319 /// the original value.
320 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
321
322 bool supportsWGP() const { return getGeneration() >= GFX10; }
323
324 bool hasIntClamp() const {
325 return HasIntClamp;
326 }
327
328 bool hasFP64() const {
329 return FP64;
330 }
331
332 bool hasMIMG_R128() const {
333 return MIMG_R128;
334 }
335
336 bool hasHWFP64() const {
337 return FP64;
338 }
339
340 bool hasHalfRate64Ops() const {
341 return HalfRate64Ops;
342 }
343
344 bool hasFullRate64Ops() const {
345 return FullRate64Ops;
346 }
347
348 bool hasAddr64() const {
350 }
351
352 bool hasFlat() const {
354 }
355
356 // Return true if the target only has the reverse operand versions of VALU
357 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
358 bool hasOnlyRevVALUShifts() const {
360 }
361
362 bool hasFractBug() const {
364 }
365
366 bool hasBFE() const {
367 return true;
368 }
369
370 bool hasBFI() const {
371 return true;
372 }
373
374 bool hasBFM() const {
375 return hasBFE();
376 }
377
378 bool hasBCNT(unsigned Size) const {
379 return true;
380 }
381
382 bool hasFFBL() const {
383 return true;
384 }
385
386 bool hasFFBH() const {
387 return true;
388 }
389
390 bool hasMed3_16() const {
392 }
393
394 bool hasMin3Max3_16() const {
396 }
397
398 bool hasFmaMixInsts() const {
399 return HasFmaMixInsts;
400 }
401
402 bool hasCARRY() const {
403 return true;
404 }
405
406 bool hasFMA() const {
407 return FMA;
408 }
409
410 bool hasSwap() const {
411 return GFX9Insts;
412 }
413
414 bool hasScalarPackInsts() const {
415 return GFX9Insts;
416 }
417
418 bool hasScalarMulHiInsts() const {
419 return GFX9Insts;
420 }
421
424 }
425
427 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
428 return getGeneration() >= GFX9;
429 }
430
431 /// True if the offset field of DS instructions works as expected. On SI, the
432 /// offset uses a 16-bit adder and does not always wrap properly.
433 bool hasUsableDSOffset() const {
434 return getGeneration() >= SEA_ISLANDS;
435 }
436
439 }
440
441 /// Condition output from div_scale is usable.
444 }
445
446 /// Extra wait hazard is needed in some cases before
447 /// s_cbranch_vccnz/s_cbranch_vccz.
448 bool hasReadVCCZBug() const {
449 return getGeneration() <= SEA_ISLANDS;
450 }
451
452 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
454 return getGeneration() >= GFX10;
455 }
456
457 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
458 /// was written by a VALU instruction.
461 }
462
463 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
464 /// SGPR was written by a VALU Instruction.
467 }
468
469 bool hasRFEHazards() const {
471 }
472
473 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
474 unsigned getSetRegWaitStates() const {
475 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
476 }
477
478 bool dumpCode() const {
479 return DumpCode;
480 }
481
482 /// Return the amount of LDS that can be used that will not restrict the
483 /// occupancy lower than WaveCount.
484 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
485 const Function &) const;
486
489 }
490
491 /// \returns If target supports S_DENORM_MODE.
492 bool hasDenormModeInst() const {
494 }
495
496 bool useFlatForGlobal() const {
497 return FlatForGlobal;
498 }
499
500 /// \returns If target supports ds_read/write_b128 and user enables generation
501 /// of ds_read/write_b128.
502 bool useDS128() const {
503 return CIInsts && EnableDS128;
504 }
505
506 /// \return If target supports ds_read/write_b96/128.
507 bool hasDS96AndDS128() const {
508 return CIInsts;
509 }
510
511 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
512 bool haveRoundOpsF64() const {
513 return CIInsts;
514 }
515
516 /// \returns If MUBUF instructions always perform range checking, even for
517 /// buffer resources used for private memory access.
520 }
521
522 /// \returns If target requires PRT Struct NULL support (zero result registers
523 /// for sparse texture support).
524 bool usePRTStrictNull() const {
525 return EnablePRTStrictNull;
526 }
527
530 }
531
532 /// \returns true if the target supports backing off of s_barrier instructions
533 /// when an exception is raised.
535 return BackOffBarrier;
536 }
537
540 }
541
544 }
545
546 bool hasUnalignedDSAccess() const {
547 return UnalignedDSAccess;
548 }
549
552 }
553
556 }
557
559 return UnalignedAccessMode;
560 }
561
562 bool hasApertureRegs() const {
563 return HasApertureRegs;
564 }
565
566 bool isTrapHandlerEnabled() const {
567 return TrapHandler;
568 }
569
570 bool isXNACKEnabled() const {
571 return TargetID.isXnackOnOrAny();
572 }
573
574 bool isTgSplitEnabled() const {
575 return EnableTgSplit;
576 }
577
578 bool isCuModeEnabled() const {
579 return EnableCuMode;
580 }
581
582 bool hasFlatAddressSpace() const {
583 return FlatAddressSpace;
584 }
585
586 bool hasFlatScrRegister() const {
587 return hasFlatAddressSpace();
588 }
589
590 bool hasFlatInstOffsets() const {
591 return FlatInstOffsets;
592 }
593
594 bool hasFlatGlobalInsts() const {
595 return FlatGlobalInsts;
596 }
597
598 bool hasFlatScratchInsts() const {
599 return FlatScratchInsts;
600 }
601
602 // Check if target supports ST addressing mode with FLAT scratch instructions.
603 // The ST addressing mode means no registers are used, either VGPR or SGPR,
604 // but only immediate offset is swizzled and added to the FLAT scratch base.
605 bool hasFlatScratchSTMode() const {
607 }
608
609 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
610
613 }
614
615 bool enableFlatScratch() const {
616 return flatScratchIsArchitected() ||
618 }
619
620 bool hasGlobalAddTidInsts() const {
621 return GFX10_BEncoding;
622 }
623
624 bool hasAtomicCSub() const {
625 return GFX10_BEncoding;
626 }
627
629 return getGeneration() >= GFX9;
630 }
631
634 }
635
637 return getGeneration() > GFX9;
638 }
639
640 bool hasD16LoadStore() const {
641 return getGeneration() >= GFX9;
642 }
643
646 }
647
648 bool hasD16Images() const {
650 }
651
652 /// Return if most LDS instructions have an m0 use that require m0 to be
653 /// initialized.
654 bool ldsRequiresM0Init() const {
655 return getGeneration() < GFX9;
656 }
657
658 // True if the hardware rewinds and replays GWS operations if a wave is
659 // preempted.
660 //
661 // If this is false, a GWS operation requires testing if a nack set the
662 // MEM_VIOL bit, and repeating if so.
663 bool hasGWSAutoReplay() const {
664 return getGeneration() >= GFX9;
665 }
666
667 /// \returns if target has ds_gws_sema_release_all instruction.
668 bool hasGWSSemaReleaseAll() const {
669 return CIInsts;
670 }
671
672 /// \returns true if the target has integer add/sub instructions that do not
673 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
674 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
675 /// for saturation.
676 bool hasAddNoCarry() const {
677 return AddNoCarryInsts;
678 }
679
680 bool hasUnpackedD16VMem() const {
681 return HasUnpackedD16VMem;
682 }
683
684 // Covers VS/PS/CS graphics shaders
685 bool isMesaGfxShader(const Function &F) const {
686 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
687 }
688
689 bool hasMad64_32() const {
690 return getGeneration() >= SEA_ISLANDS;
691 }
692
693 bool hasSDWAOmod() const {
694 return HasSDWAOmod;
695 }
696
697 bool hasSDWAScalar() const {
698 return HasSDWAScalar;
699 }
700
701 bool hasSDWASdst() const {
702 return HasSDWASdst;
703 }
704
705 bool hasSDWAMac() const {
706 return HasSDWAMac;
707 }
708
709 bool hasSDWAOutModsVOPC() const {
710 return HasSDWAOutModsVOPC;
711 }
712
713 bool hasDLInsts() const {
714 return HasDLInsts;
715 }
716
717 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
718
719 bool hasDot1Insts() const {
720 return HasDot1Insts;
721 }
722
723 bool hasDot2Insts() const {
724 return HasDot2Insts;
725 }
726
727 bool hasDot3Insts() const {
728 return HasDot3Insts;
729 }
730
731 bool hasDot4Insts() const {
732 return HasDot4Insts;
733 }
734
735 bool hasDot5Insts() const {
736 return HasDot5Insts;
737 }
738
739 bool hasDot6Insts() const {
740 return HasDot6Insts;
741 }
742
743 bool hasDot7Insts() const {
744 return HasDot7Insts;
745 }
746
747 bool hasDot8Insts() const {
748 return HasDot8Insts;
749 }
750
751 bool hasDot9Insts() const {
752 return HasDot9Insts;
753 }
754
755 bool hasDot10Insts() const {
756 return HasDot10Insts;
757 }
758
759 bool hasMAIInsts() const {
760 return HasMAIInsts;
761 }
762
763 bool hasFP8Insts() const {
764 return HasFP8Insts;
765 }
766
767 bool hasPkFmacF16Inst() const {
768 return HasPkFmacF16Inst;
769 }
770
772
774
775 bool hasAtomicFaddInsts() const {
777 }
778
780
782
785 }
786
789 }
790
793 }
794
796
797 bool hasNoSdstCMPX() const {
798 return HasNoSdstCMPX;
799 }
800
801 bool hasVscnt() const {
802 return HasVscnt;
803 }
804
805 bool hasGetWaveIdInst() const {
806 return HasGetWaveIdInst;
807 }
808
809 bool hasSMemTimeInst() const {
810 return HasSMemTimeInst;
811 }
812
815 }
816
817 bool hasVOP3Literal() const {
818 return HasVOP3Literal;
819 }
820
821 bool hasNoDataDepHazard() const {
822 return HasNoDataDepHazard;
823 }
824
826 return getGeneration() < SEA_ISLANDS;
827 }
828
829 bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
830
831 // Scratch is allocated in 256 dword per wave blocks for the entire
832 // wavefront. When viewed from the perspective of an arbitrary workitem, this
833 // is 4-byte aligned.
834 //
835 // Only 4-byte alignment is really needed to access anything. Transformations
836 // on the pointer value itself may rely on the alignment / known low bits of
837 // the pointer. Set this to something above the minimum to avoid needing
838 // dynamic realignment in common cases.
839 Align getStackAlignment() const { return Align(16); }
840
841 bool enableMachineScheduler() const override {
842 return true;
843 }
844
845 bool useAA() const override;
846
847 bool enableSubRegLiveness() const override {
848 return true;
849 }
850
853
854 // static wrappers
855 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
856
857 // XXX - Why is this here if it isn't in the default pass set?
858 bool enableEarlyIfConversion() const override {
859 return true;
860 }
861
863 unsigned NumRegionInstrs) const override;
864
865 unsigned getMaxNumUserSGPRs() const {
866 return AMDGPU::getMaxNumUserSGPRs(*this);
867 }
868
869 bool hasSMemRealTime() const {
870 return HasSMemRealTime;
871 }
872
873 bool hasMovrel() const {
874 return HasMovrel;
875 }
876
877 bool hasVGPRIndexMode() const {
878 return HasVGPRIndexMode;
879 }
880
881 bool useVGPRIndexMode() const;
882
883 bool hasScalarCompareEq64() const {
885 }
886
887 bool hasScalarStores() const {
888 return HasScalarStores;
889 }
890
891 bool hasScalarAtomics() const {
892 return HasScalarAtomics;
893 }
894
895 bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
896
897 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
898 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
899
900 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
901 bool hasPermLane64() const { return getGeneration() >= GFX11; }
902
903 bool hasDPP() const {
904 return HasDPP;
905 }
906
907 bool hasDPPBroadcasts() const {
908 return HasDPP && getGeneration() < GFX10;
909 }
910
912 return HasDPP && getGeneration() < GFX10;
913 }
914
915 bool hasDPP8() const {
916 return HasDPP8;
917 }
918
919 bool hasDPALU_DPP() const {
920 return HasDPALU_DPP;
921 }
922
923 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
924
925 bool hasPackedFP32Ops() const {
926 return HasPackedFP32Ops;
927 }
928
929 // Has V_PK_MOV_B32 opcode
930 bool hasPkMovB32() const {
931 return GFX90AInsts;
932 }
933
935 return getGeneration() >= GFX10 || hasGFX940Insts();
936 }
937
938 bool hasImageInsts() const {
939 return HasImageInsts;
940 }
941
944 }
945
946 bool hasR128A16() const {
947 return HasR128A16;
948 }
949
950 bool hasA16() const { return HasA16; }
951
952 bool hasG16() const { return HasG16; }
953
954 bool hasOffset3fBug() const {
955 return HasOffset3fBug;
956 }
957
959
961
962 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
963
965
966 bool hasNSAEncoding() const { return HasNSAEncoding; }
967
969
970 unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
971
972 bool hasGFX10_AEncoding() const {
973 return GFX10_AEncoding;
974 }
975
976 bool hasGFX10_BEncoding() const {
977 return GFX10_BEncoding;
978 }
979
980 bool hasGFX10_3Insts() const {
981 return GFX10_3Insts;
982 }
983
984 bool hasMadF16() const;
985
986 bool hasMovB64() const { return GFX940Insts; }
987
988 bool hasLshlAddB64() const { return GFX940Insts; }
989
990 bool enableSIScheduler() const {
991 return EnableSIScheduler;
992 }
993
994 bool loadStoreOptEnabled() const {
995 return EnableLoadStoreOpt;
996 }
997
998 bool hasSGPRInitBug() const {
999 return SGPRInitBug;
1000 }
1001
1003 return UserSGPRInit16Bug && isWave32();
1004 }
1005
1007
1010 }
1011
1014 }
1015
1018 }
1019
1020 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1022 return CIInsts;
1023 }
1024
1027 }
1028
1032 }
1033
1036 }
1037
1040 }
1041
1044 }
1045
1048 }
1049
1052 }
1053
1054 bool hasLDSMisalignedBug() const {
1055 return LDSMisalignedBug && !EnableCuMode;
1056 }
1057
1059 return HasInstFwdPrefetchBug;
1060 }
1061
1063 return HasVcmpxExecWARHazard;
1064 }
1065
1068 }
1069
1070 // Shift amount of a 64 bit shift cannot be a highest allocated register
1071 // if also at the end of the allocation block.
1073 return GFX90AInsts && !GFX940Insts;
1074 }
1075
1076 // Has one cycle hazard on transcendental instruction feeding a
1077 // non transcendental VALU.
1078 bool hasTransForwardingHazard() const { return GFX940Insts; }
1079
1080 // Has one cycle hazard on a VALU instruction partially writing dst with
1081 // a shift of result bits feeding another VALU instruction.
1083
1084 // Cannot use op_sel with v_dot instructions.
1085 bool hasDOTOpSelHazard() const { return GFX940Insts; }
1086
1087 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1088 bool hasVDecCoExecHazard() const {
1089 return GFX940Insts;
1090 }
1091
1092 bool hasNSAtoVMEMBug() const {
1093 return HasNSAtoVMEMBug;
1094 }
1095
1096 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1097
1098 bool hasHardClauses() const { return getGeneration() >= GFX10; }
1099
1100 bool hasGFX90AInsts() const { return GFX90AInsts; }
1101
1103 return getGeneration() == GFX10;
1104 }
1105
1106 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1107
1108 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1109
1111 return getGeneration() >= GFX11;
1112 }
1113
1115
1117
1118 bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1119
1120 /// Return if operations acting on VGPR tuples require even alignment.
1121 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1122
1123 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1124 bool hasSPackHL() const { return GFX11Insts; }
1125
1126 /// Return true if the target's EXP instruction has the COMPR flag, which
1127 /// affects the meaning of the EN (enable) bits.
1128 bool hasCompressedExport() const { return !GFX11Insts; }
1129
1130 /// Return true if the target's EXP instruction supports the NULL export
1131 /// target.
1132 bool hasNullExportTarget() const { return !GFX11Insts; }
1133
1134 bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1135
1136 bool hasVOPDInsts() const { return HasVOPDInsts; }
1137
1139
1140 /// Return true if the target has the S_DELAY_ALU instruction.
1141 bool hasDelayAlu() const { return GFX11Insts; }
1142
1143 bool hasPackedTID() const { return HasPackedTID; }
1144
1145 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1146 // hasGFX90AInsts is also true.
1147 bool hasGFX940Insts() const { return GFX940Insts; }
1148
1149 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1150
1152
1153 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1154 /// SGPRs
1155 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1156
1157 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1158 /// VGPRs
1159 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1160
1161 /// Return occupancy for the given function. Used LDS and a number of
1162 /// registers if provided.
1163 /// Note, occupancy can be affected by the scratch allocation as well, but
1164 /// we do not have enough information to compute it.
1165 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1166 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1167
1168 /// \returns true if the flat_scratch register should be initialized with the
1169 /// pointer to the wave's scratch memory rather than a size and offset.
1172 }
1173
1174 /// \returns true if the flat_scratch register is initialized by the HW.
1175 /// In this case it is readonly.
1177
1178 /// \returns true if the architected SGPRs are enabled.
1180
1181 /// \returns true if Global Data Share is supported.
1182 bool hasGDS() const { return HasGDS; }
1183
1184 /// \returns true if Global Wave Sync is supported.
1185 bool hasGWS() const { return HasGWS; }
1186
1187 /// \returns true if the machine has merged shaders in which s0-s7 are
1188 /// reserved by the hardware and user SGPRs start at s8
1189 bool hasMergedShaders() const {
1190 return getGeneration() >= GFX9;
1191 }
1192
1193 // \returns true if the target supports the pre-NGG legacy geometry path.
1194 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1195
1196 // \returns true if preloading kernel arguments is supported.
1197 bool hasKernargPreload() const { return KernargPreload; }
1198
1199 // \returns true if we need to generate backwards compatible code when
1200 // preloading kernel arguments.
1202 return hasKernargPreload() && !hasGFX940Insts();
1203 }
1204
1205 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1206 bool hasCvtFP8VOP1Bug() const { return true; }
1207
1208 // \returns true is CSUB atomics support a no-return form.
1210
1211 /// \returns SGPR allocation granularity supported by the subtarget.
1212 unsigned getSGPRAllocGranule() const {
1214 }
1215
1216 /// \returns SGPR encoding granularity supported by the subtarget.
1217 unsigned getSGPREncodingGranule() const {
1219 }
1220
1221 /// \returns Total number of SGPRs supported by the subtarget.
1222 unsigned getTotalNumSGPRs() const {
1224 }
1225
1226 /// \returns Addressable number of SGPRs supported by the subtarget.
1227 unsigned getAddressableNumSGPRs() const {
1229 }
1230
1231 /// \returns Minimum number of SGPRs that meets the given number of waves per
1232 /// execution unit requirement supported by the subtarget.
1233 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1234 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1235 }
1236
1237 /// \returns Maximum number of SGPRs that meets the given number of waves per
1238 /// execution unit requirement supported by the subtarget.
1239 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1240 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1241 }
1242
1243 /// \returns Reserved number of SGPRs. This is common
1244 /// utility function called by MachineFunction and
1245 /// Function variants of getReservedNumSGPRs.
1246 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1247 /// \returns Reserved number of SGPRs for given machine function \p MF.
1248 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1249
1250 /// \returns Reserved number of SGPRs for given function \p F.
1251 unsigned getReservedNumSGPRs(const Function &F) const;
1252
1253 /// \returns max num SGPRs. This is the common utility
1254 /// function called by MachineFunction and Function
1255 /// variants of getMaxNumSGPRs.
1256 unsigned getBaseMaxNumSGPRs(const Function &F,
1257 std::pair<unsigned, unsigned> WavesPerEU,
1258 unsigned PreloadedSGPRs,
1259 unsigned ReservedNumSGPRs) const;
1260
1261 /// \returns Maximum number of SGPRs that meets number of waves per execution
1262 /// unit requirement for function \p MF, or number of SGPRs explicitly
1263 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1264 ///
1265 /// \returns Value that meets number of waves per execution unit requirement
1266 /// if explicitly requested value cannot be converted to integer, violates
1267 /// subtarget's specifications, or does not meet number of waves per execution
1268 /// unit requirement.
1269 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1270
1271 /// \returns Maximum number of SGPRs that meets number of waves per execution
1272 /// unit requirement for function \p F, or number of SGPRs explicitly
1273 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1274 ///
1275 /// \returns Value that meets number of waves per execution unit requirement
1276 /// if explicitly requested value cannot be converted to integer, violates
1277 /// subtarget's specifications, or does not meet number of waves per execution
1278 /// unit requirement.
1279 unsigned getMaxNumSGPRs(const Function &F) const;
1280
1281 /// \returns VGPR allocation granularity supported by the subtarget.
1282 unsigned getVGPRAllocGranule() const {
1284 }
1285
1286 /// \returns VGPR encoding granularity supported by the subtarget.
1287 unsigned getVGPREncodingGranule() const {
1289 }
1290
1291 /// \returns Total number of VGPRs supported by the subtarget.
1292 unsigned getTotalNumVGPRs() const {
1294 }
1295
1296 /// \returns Addressable number of VGPRs supported by the subtarget.
1297 unsigned getAddressableNumVGPRs() const {
1299 }
1300
1301 /// \returns the minimum number of VGPRs that will prevent achieving more than
1302 /// the specified number of waves \p WavesPerEU.
1303 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1304 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1305 }
1306
1307 /// \returns the maximum number of VGPRs that can be used and still achieved
1308 /// at least the specified number of waves \p WavesPerEU.
1309 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1310 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1311 }
1312
1313 /// \returns max num VGPRs. This is the common utility function
1314 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1315 unsigned getBaseMaxNumVGPRs(const Function &F,
1316 std::pair<unsigned, unsigned> WavesPerEU) const;
1317 /// \returns Maximum number of VGPRs that meets number of waves per execution
1318 /// unit requirement for function \p F, or number of VGPRs explicitly
1319 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1320 ///
1321 /// \returns Value that meets number of waves per execution unit requirement
1322 /// if explicitly requested value cannot be converted to integer, violates
1323 /// subtarget's specifications, or does not meet number of waves per execution
1324 /// unit requirement.
1325 unsigned getMaxNumVGPRs(const Function &F) const;
1326
1327 unsigned getMaxNumAGPRs(const Function &F) const {
1328 return getMaxNumVGPRs(F);
1329 }
1330
1331 /// \returns Maximum number of VGPRs that meets number of waves per execution
1332 /// unit requirement for function \p MF, or number of VGPRs explicitly
1333 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1334 ///
1335 /// \returns Value that meets number of waves per execution unit requirement
1336 /// if explicitly requested value cannot be converted to integer, violates
1337 /// subtarget's specifications, or does not meet number of waves per execution
1338 /// unit requirement.
1339 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1340
1341 void getPostRAMutations(
1342 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1343 const override;
1344
1345 std::unique_ptr<ScheduleDAGMutation>
1347
1348 bool isWave32() const {
1349 return getWavefrontSize() == 32;
1350 }
1351
1352 bool isWave64() const {
1353 return getWavefrontSize() == 64;
1354 }
1355
1357 return getRegisterInfo()->getBoolRC();
1358 }
1359
1360 /// \returns Maximum number of work groups per compute unit supported by the
1361 /// subtarget and limited by given \p FlatWorkGroupSize.
1362 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1363 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1364 }
1365
1366 /// \returns Minimum flat work group size supported by the subtarget.
1367 unsigned getMinFlatWorkGroupSize() const override {
1369 }
1370
1371 /// \returns Maximum flat work group size supported by the subtarget.
1372 unsigned getMaxFlatWorkGroupSize() const override {
1374 }
1375
1376 /// \returns Number of waves per execution unit required to support the given
1377 /// \p FlatWorkGroupSize.
1378 unsigned
1379 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1380 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1381 }
1382
1383 /// \returns Minimum number of waves per execution unit supported by the
1384 /// subtarget.
1385 unsigned getMinWavesPerEU() const override {
1387 }
1388
1389 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1390 SDep &Dep) const override;
1391
1392 // \returns true if it's beneficial on this subtarget for the scheduler to
1393 // cluster stores as well as loads.
1394 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1395
1396 // \returns the number of address arguments from which to enable MIMG NSA
1397 // on supported architectures.
1398 unsigned getNSAThreshold(const MachineFunction &MF) const;
1399
1400 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1401 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1403 // Currently all targets that support the dealloc VGPRs message also require
1404 // the nop.
1405 return true;
1406 }
1407};
1408
1410public:
1411 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1412
1413 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1414
1415 bool hasDispatchPtr() const { return DispatchPtr; }
1416
1417 bool hasQueuePtr() const { return QueuePtr; }
1418
1419 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1420
1421 bool hasDispatchID() const { return DispatchID; }
1422
1423 bool hasFlatScratchInit() const { return FlatScratchInit; }
1424
1425 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1426
1427 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1428
1429 unsigned getNumFreeUserSGPRs();
1430
1431 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1432
1433 enum UserSGPRID : unsigned {
1443
1444 // Returns the size in number of SGPRs for preload user SGPR field.
1446 switch (ID) {
1448 return 2;
1450 return 4;
1451 case DispatchPtrID:
1452 return 2;
1453 case QueuePtrID:
1454 return 2;
1456 return 2;
1457 case DispatchIdID:
1458 return 2;
1459 case FlatScratchInitID:
1460 return 2;
1462 return 1;
1463 }
1464 llvm_unreachable("Unknown UserSGPRID.");
1465 }
1466
1467 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1468
1469private:
1470 const GCNSubtarget &ST;
1471
1472 // Private memory buffer
1473 // Compute directly in sgpr[0:1]
1474 // Other shaders indirect 64-bits at sgpr[0:1]
1475 bool ImplicitBufferPtr = false;
1476
1477 bool PrivateSegmentBuffer = false;
1478
1479 bool DispatchPtr = false;
1480
1481 bool QueuePtr = false;
1482
1483 bool KernargSegmentPtr = false;
1484
1485 bool DispatchID = false;
1486
1487 bool FlatScratchInit = false;
1488
1489 unsigned NumKernargPreloadSGPRs = 0;
1490
1491 unsigned NumUsedUserSGPRs = 0;
1492};
1493
1494} // end namespace llvm
1495
1496#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
return InstrInfo
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
static constexpr uint32_t Opcode
Definition: aarch32.h:200
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasFlat() const
Definition: GCNSubtarget.h:352
bool hasD16Images() const
Definition: GCNSubtarget.h:648
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:771
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:693
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:206
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:453
bool hasSwap() const
Definition: GCNSubtarget.h:410
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:767
bool hasDot2Insts() const
Definition: GCNSubtarget.h:723
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:640
bool hasMergedShaders() const
bool hasA16() const
Definition: GCNSubtarget.h:950
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:697
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:534
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:883
int getLDSBankCount() const
Definition: GCNSubtarget.h:307
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:358
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:958
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:442
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:433
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:994
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:847
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:911
unsigned getSGPRAllocGranule() const
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:636
bool flatScratchIsPointer() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
bool hasSDWAMac() const
Definition: GCNSubtarget.h:705
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:743
bool hasApertureRegs() const
Definition: GCNSubtarget.h:562
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:437
bool hasFPAtomicToDenormModeHazard() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:590
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:825
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:805
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:851
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:713
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:942
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:378
bool hasMAIInsts() const
Definition: GCNSubtarget.h:759
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:598
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:628
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:336
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:492
bool hasMFMAInlineLiteralBug() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:809
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:550
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:237
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:719
bool hasDot3Insts() const
Definition: GCNSubtarget.h:727
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:962
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:257
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:199
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:528
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:779
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:282
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
Definition: GCNSubtarget.h:930
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:980
Align getStackAlignment() const
Definition: GCNSubtarget.h:839
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:518
bool enableFlatScratch() const
Definition: GCNSubtarget.h:615
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:538
bool hasR128A16() const
Definition: GCNSubtarget.h:946
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:954
bool hasDwordx3LoadStores() const
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:620
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:998
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:586
bool hasPermLane64() const
Definition: GCNSubtarget.h:901
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:426
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:574
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:795
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:763
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:249
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
bool hasMSAALoadDstSelBug() const
Definition: GCNSubtarget.h:964
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:934
bool hasVscnt() const
Definition: GCNSubtarget.h:801
bool hasMad64_32() const
Definition: GCNSubtarget.h:689
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:261
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:116
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:502
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:644
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:717
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:829
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:685
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasDPP() const
Definition: GCNSubtarget.h:903
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:269
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:422
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:578
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:891
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:241
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:554
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:278
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:709
bool hasGFX11FullVGPRs() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:273
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:611
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasLDSFPAtomicAdd() const
Definition: GCNSubtarget.h:895
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:478
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:821
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:546
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:394
bool hasIntClamp() const
Definition: GCNSubtarget.h:324
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:972
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:632
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:474
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:245
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:925
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:739
bool hasGFX940Insts() const
bool hasLshlAddB64() const
Definition: GCNSubtarget.h:988
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:344
bool hasScalarStores() const
Definition: GCNSubtarget.h:887
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:566
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:841
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:159
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:594
unsigned getNSAThreshold(const MachineFunction &MF) const
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:852
bool hasReadM0LdsDmaHazard() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:303
bool hasSDWASdst() const
Definition: GCNSubtarget.h:701
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:414
bool hasFFBL() const
Definition: GCNSubtarget.h:382
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:966
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:869
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:582
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:907
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:524
bool hasMovB64() const
Definition: GCNSubtarget.h:986
bool hasInstFwdPrefetchBug() const
bool hasMed3_16() const
Definition: GCNSubtarget.h:390
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:873
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:773
bool needsKernargPreloadBackwardsCompatibility() const
bool hasBFI() const
Definition: GCNSubtarget.h:370
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:654
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:203
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:877
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:163
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:542
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:311
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
Definition: GCNSubtarget.h:938
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:960
bool hasFMA() const
Definition: GCNSubtarget.h:406
bool hasDot10Insts() const
Definition: GCNSubtarget.h:755
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:487
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:386
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:609
bool supportsWGP() const
Definition: GCNSubtarget.h:322
bool hasG16() const
Definition: GCNSubtarget.h:952
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:340
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:775
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:162
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:185
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:783
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:332
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
Definition: GCNSubtarget.h:915
bool hasDot5Insts() const
Definition: GCNSubtarget.h:735
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:865
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:781
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:898
bool hasFlatScratchSVSSwizzleBug() const
bool hasVDecCoExecHazard() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:374
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:512
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:747
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:418
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:265
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:507
bool hasGWS() const
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:496
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:976
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:223
Generation getGeneration() const
Definition: GCNSubtarget.h:288
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:817
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:787
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:797
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:570
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:680
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:858
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:459
bool hasRFEHazards() const
Definition: GCNSubtarget.h:469
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:465
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:605
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:668
bool hasDPALU_DPP() const
Definition: GCNSubtarget.h:919
bool enableSIScheduler() const
Definition: GCNSubtarget.h:990
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:791
bool hasAddr64() const
Definition: GCNSubtarget.h:348
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:202
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:165
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:558
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:448
bool isWave64() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:398
bool hasCARRY() const
Definition: GCNSubtarget.h:402
bool hasPackedTID() const
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:970
bool hasFP64() const
Definition: GCNSubtarget.h:328
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:676
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:813
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:93
bool hasFractBug() const
Definition: GCNSubtarget.h:362
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
Definition: GCNSubtarget.h:923
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:292
bool hasDot4Insts() const
Definition: GCNSubtarget.h:731
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
Definition: GCNSubtarget.h:968
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:751
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:624
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:253
bool hasBFE() const
Definition: GCNSubtarget.h:366
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:663
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:280
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.