LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
64 unsigned Gen = INVALID;
66 int LDSBankCount = 0;
68
69 // Possibly statically set by tablegen, but may want to be overridden.
70 bool FastDenormalF32 = false;
71 bool HalfRate64Ops = false;
72 bool FullRate64Ops = false;
73
74 // Dynamically set bits that enable features.
75 bool FlatForGlobal = false;
77 bool BackOffBarrier = false;
79 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasCubeInsts = false;
170 bool HasLerpInst = false;
171 bool HasSadInsts = false;
172 bool HasQsadInsts = false;
173 bool HasCvtNormInsts = false;
176 bool HasFP8E5M3Insts = false;
177 bool HasCvtFP8Vop1Bug = false;
178 bool HasPkFmacF16Inst = false;
199 bool HasXF32Insts = false;
200 /// The maximum number of instructions that may be placed within an S_CLAUSE,
201 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
202 /// indicates a lack of S_CLAUSE support.
204 bool SupportsSRAMECC = false;
205 bool DynamicVGPR = false;
207 bool HasVMemToLDSLoad = false;
208 bool RequiresAlignVGPR = false;
209
210 // This should not be used directly. 'TargetID' tracks the dynamic settings
211 // for SRAMECC.
212 bool EnableSRAMECC = false;
213
214 bool HasNoSdstCMPX = false;
215 bool HasVscnt = false;
216 bool HasWaitXcnt = false;
217 bool HasGetWaveIdInst = false;
218 bool HasSMemTimeInst = false;
221 bool HasVOP3Literal = false;
222 bool HasNoDataDepHazard = false;
223 bool FlatAddressSpace = false;
224 bool FlatInstOffsets = false;
225 bool FlatGlobalInsts = false;
226 bool FlatScratchInsts = false;
227 bool FlatGVSMode = false;
230 bool EnableFlatScratch = false;
232 bool HasGDS = false;
233 bool HasGWS = false;
234 bool AddNoCarryInsts = false;
235 bool HasUnpackedD16VMem = false;
236 bool LDSMisalignedBug = false;
239 bool UnalignedDSAccess = false;
240 bool HasPackedTID = false;
241 bool ScalarizeGlobal = false;
242 bool HasSALUFloatInsts = false;
245 bool Has64BitLiterals = false;
247 bool HasBitOp3Insts = false;
248 bool HasTanhInsts = false;
251 bool HasPrngInst = false;
253 bool HasPermlane16Swap = false;
254 bool HasPermlane32Swap = false;
259 bool HasVmemPrefInsts = false;
261 bool HasSafeCUPrefetch = false;
264 bool HasNSAtoVMEMBug = false;
265 bool HasNSAClauseBug = false;
266 bool HasOffset3fBug = false;
272 bool Has1_5xVGPRs = false;
273 bool HasMADIntraFwdBug = false;
274 bool HasVOPDInsts = false;
278 bool HasAshrPkInsts = false;
282 bool HasMin3Max3PKF16 = false;
284 bool HasLshlAddU64Inst = false;
285 bool HasAddSubU64Insts = false;
286 bool HasMadU32Inst = false;
287 bool HasAddMinMaxInsts = false;
292
293 bool RequiresCOV6 = false;
296
298
299 bool HasClusters = false;
301
302 // Dummy feature to use for assembler in tablegen.
303 bool FeatureDisable = false;
304
305private:
306 SIInstrInfo InstrInfo;
307 SITargetLowering TLInfo;
308 SIFrameLowering FrameLowering;
309
310public:
311 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
312 const GCNTargetMachine &TM);
313 ~GCNSubtarget() override;
314
316 StringRef GPU, StringRef FS);
317
318 /// Diagnose inconsistent subtarget features before attempting to codegen
319 /// function \p F.
320 void checkSubtargetFeatures(const Function &F) const;
321
322 const SIInstrInfo *getInstrInfo() const override {
323 return &InstrInfo;
324 }
325
326 const SIFrameLowering *getFrameLowering() const override {
327 return &FrameLowering;
328 }
329
330 const SITargetLowering *getTargetLowering() const override {
331 return &TLInfo;
332 }
333
334 const SIRegisterInfo *getRegisterInfo() const override {
335 return &InstrInfo.getRegisterInfo();
336 }
337
338 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
339
340 const CallLowering *getCallLowering() const override {
341 return CallLoweringInfo.get();
342 }
343
344 const InlineAsmLowering *getInlineAsmLowering() const override {
345 return InlineAsmLoweringInfo.get();
346 }
347
349 return InstSelector.get();
350 }
351
352 const LegalizerInfo *getLegalizerInfo() const override {
353 return Legalizer.get();
354 }
355
356 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
357 return RegBankInfo.get();
358 }
359
361 return TargetID;
362 }
363
365 return &InstrItins;
366 }
367
369
371 return (Generation)Gen;
372 }
373
374 unsigned getMaxWaveScratchSize() const {
375 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
376 if (getGeneration() >= GFX12) {
377 // 18-bit field in units of 64-dword.
378 return (64 * 4) * ((1 << 18) - 1);
379 }
380 if (getGeneration() == GFX11) {
381 // 15-bit field in units of 64-dword.
382 return (64 * 4) * ((1 << 15) - 1);
383 }
384 // 13-bit field in units of 256-dword.
385 return (256 * 4) * ((1 << 13) - 1);
386 }
387
388 /// Return the number of high bits known to be zero for a frame index.
392
393 int getLDSBankCount() const {
394 return LDSBankCount;
395 }
396
397 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
398 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
399 }
400
401 unsigned getConstantBusLimit(unsigned Opcode) const;
402
403 /// Returns if the result of this instruction with a 16-bit result returned in
404 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
405 /// the original value.
406 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
407
408 bool supportsWGP() const {
409 if (GFX1250Insts)
410 return false;
411 return getGeneration() >= GFX10;
412 }
413
414 bool hasIntClamp() const {
415 return HasIntClamp;
416 }
417
418 bool hasFP64() const {
419 return FP64;
420 }
421
422 bool hasMIMG_R128() const {
423 return MIMG_R128;
424 }
425
426 bool hasHWFP64() const {
427 return FP64;
428 }
429
430 bool hasHalfRate64Ops() const {
431 return HalfRate64Ops;
432 }
433
434 bool hasFullRate64Ops() const {
435 return FullRate64Ops;
436 }
437
438 bool hasAddr64() const {
440 }
441
442 bool hasFlat() const {
444 }
445
446 // Return true if the target only has the reverse operand versions of VALU
447 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
448 bool hasOnlyRevVALUShifts() const {
450 }
451
452 bool hasFractBug() const {
454 }
455
456 bool hasBFE() const {
457 return true;
458 }
459
460 bool hasBFI() const {
461 return true;
462 }
463
464 bool hasBFM() const {
465 return hasBFE();
466 }
467
468 bool hasBCNT(unsigned Size) const {
469 return true;
470 }
471
472 bool hasFFBL() const {
473 return true;
474 }
475
476 bool hasFFBH() const {
477 return true;
478 }
479
480 bool hasMed3_16() const {
482 }
483
484 bool hasMin3Max3_16() const {
486 }
487
488 bool hasFmaMixInsts() const {
489 return HasFmaMixInsts;
490 }
491
492 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
493
494 bool hasCARRY() const {
495 return true;
496 }
497
498 bool hasFMA() const {
499 return FMA;
500 }
501
502 bool hasSwap() const {
503 return GFX9Insts;
504 }
505
506 bool hasScalarPackInsts() const {
507 return GFX9Insts;
508 }
509
510 bool hasScalarMulHiInsts() const {
511 return GFX9Insts;
512 }
513
514 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
515
519
521 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
522 return getGeneration() >= GFX9;
523 }
524
525 /// True if the offset field of DS instructions works as expected. On SI, the
526 /// offset uses a 16-bit adder and does not always wrap properly.
527 bool hasUsableDSOffset() const {
528 return getGeneration() >= SEA_ISLANDS;
529 }
530
534
535 /// Condition output from div_scale is usable.
539
540 /// Extra wait hazard is needed in some cases before
541 /// s_cbranch_vccnz/s_cbranch_vccz.
542 bool hasReadVCCZBug() const {
543 return getGeneration() <= SEA_ISLANDS;
544 }
545
546 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
548 return getGeneration() >= GFX10;
549 }
550
551 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
552 /// was written by a VALU instruction.
555 }
556
557 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
558 /// SGPR was written by a VALU Instruction.
561 }
562
563 bool hasRFEHazards() const {
565 }
566
567 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
568 unsigned getSetRegWaitStates() const {
569 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
570 }
571
572 bool dumpCode() const {
573 return DumpCode;
574 }
575
576 /// Return the amount of LDS that can be used that will not restrict the
577 /// occupancy lower than WaveCount.
578 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
579 const Function &) const;
580
583 }
584
585 /// \returns If target supports S_DENORM_MODE.
586 bool hasDenormModeInst() const {
588 }
589
590 bool useFlatForGlobal() const {
591 return FlatForGlobal;
592 }
593
594 /// \returns If target supports ds_read/write_b128 and user enables generation
595 /// of ds_read/write_b128.
596 bool useDS128() const {
597 return CIInsts && EnableDS128;
598 }
599
600 /// \return If target supports ds_read/write_b96/128.
601 bool hasDS96AndDS128() const {
602 return CIInsts;
603 }
604
605 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
606 bool haveRoundOpsF64() const {
607 return CIInsts;
608 }
609
610 /// \returns If MUBUF instructions always perform range checking, even for
611 /// buffer resources used for private memory access.
615
616 /// \returns If target requires PRT Struct NULL support (zero result registers
617 /// for sparse texture support).
618 bool usePRTStrictNull() const {
619 return EnablePRTStrictNull;
620 }
621
625
626 /// \returns true if the target supports backing off of s_barrier instructions
627 /// when an exception is raised.
629 return BackOffBarrier;
630 }
631
634 }
635
639
640 bool hasUnalignedDSAccess() const {
641 return UnalignedDSAccess;
642 }
643
647
650 }
651
655
657 return UnalignedAccessMode;
658 }
659
661
662 bool hasApertureRegs() const {
663 return HasApertureRegs;
664 }
665
666 bool isTrapHandlerEnabled() const {
667 return TrapHandler;
668 }
669
670 bool isXNACKEnabled() const {
671 return TargetID.isXnackOnOrAny();
672 }
673
674 bool isTgSplitEnabled() const {
675 return EnableTgSplit;
676 }
677
678 bool isCuModeEnabled() const {
679 return EnableCuMode;
680 }
681
683
684 bool hasFlatAddressSpace() const {
685 return FlatAddressSpace;
686 }
687
688 bool hasFlatScrRegister() const {
689 return hasFlatAddressSpace();
690 }
691
692 bool hasFlatInstOffsets() const {
693 return FlatInstOffsets;
694 }
695
696 bool hasFlatGlobalInsts() const {
697 return FlatGlobalInsts;
698 }
699
700 bool hasFlatScratchInsts() const {
701 return FlatScratchInsts;
702 }
703
704 // Check if target supports ST addressing mode with FLAT scratch instructions.
705 // The ST addressing mode means no registers are used, either VGPR or SGPR,
706 // but only immediate offset is swizzled and added to the FLAT scratch base.
707 bool hasFlatScratchSTMode() const {
709 }
710
711 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
712
715 }
716
717 bool enableFlatScratch() const {
718 return flatScratchIsArchitected() ||
720 }
721
722 bool hasGlobalAddTidInsts() const {
723 return GFX10_BEncoding;
724 }
725
726 bool hasAtomicCSub() const {
727 return GFX10_BEncoding;
728 }
729
730 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
731
732 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
733
734 bool hasExportInsts() const {
735 return !hasGFX940Insts() && !hasGFX1250Insts();
736 }
737
738 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
739
740 // DS_ADD_F64/DS_ADD_RTN_F64
741 bool hasLdsAtomicAddF64() const {
742 return hasGFX90AInsts() || hasGFX1250Insts();
743 }
744
746 return getGeneration() >= GFX9;
747 }
748
751 }
752
754 return getGeneration() > GFX9;
755 }
756
757 bool hasD16LoadStore() const {
758 return getGeneration() >= GFX9;
759 }
760
762 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
763 }
764
765 bool hasD16Images() const {
767 }
768
769 /// Return if most LDS instructions have an m0 use that require m0 to be
770 /// initialized.
771 bool ldsRequiresM0Init() const {
772 return getGeneration() < GFX9;
773 }
774
775 // True if the hardware rewinds and replays GWS operations if a wave is
776 // preempted.
777 //
778 // If this is false, a GWS operation requires testing if a nack set the
779 // MEM_VIOL bit, and repeating if so.
780 bool hasGWSAutoReplay() const {
781 return getGeneration() >= GFX9;
782 }
783
784 /// \returns if target has ds_gws_sema_release_all instruction.
785 bool hasGWSSemaReleaseAll() const {
786 return CIInsts;
787 }
788
789 /// \returns true if the target has integer add/sub instructions that do not
790 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
791 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
792 /// for saturation.
793 bool hasAddNoCarry() const {
794 return AddNoCarryInsts;
795 }
796
797 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
798
799 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
800
801 bool hasUnpackedD16VMem() const {
802 return HasUnpackedD16VMem;
803 }
804
805 // Covers VS/PS/CS graphics shaders
806 bool isMesaGfxShader(const Function &F) const {
807 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
808 }
809
810 bool hasMad64_32() const {
811 return getGeneration() >= SEA_ISLANDS;
812 }
813
814 bool hasSDWAOmod() const {
815 return HasSDWAOmod;
816 }
817
818 bool hasSDWAScalar() const {
819 return HasSDWAScalar;
820 }
821
822 bool hasSDWASdst() const {
823 return HasSDWASdst;
824 }
825
826 bool hasSDWAMac() const {
827 return HasSDWAMac;
828 }
829
830 bool hasSDWAOutModsVOPC() const {
831 return HasSDWAOutModsVOPC;
832 }
833
834 bool hasDLInsts() const {
835 return HasDLInsts;
836 }
837
838 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
839
840 bool hasDot1Insts() const {
841 return HasDot1Insts;
842 }
843
844 bool hasDot2Insts() const {
845 return HasDot2Insts;
846 }
847
848 bool hasDot3Insts() const {
849 return HasDot3Insts;
850 }
851
852 bool hasDot4Insts() const {
853 return HasDot4Insts;
854 }
855
856 bool hasDot5Insts() const {
857 return HasDot5Insts;
858 }
859
860 bool hasDot6Insts() const {
861 return HasDot6Insts;
862 }
863
864 bool hasDot7Insts() const {
865 return HasDot7Insts;
866 }
867
868 bool hasDot8Insts() const {
869 return HasDot8Insts;
870 }
871
872 bool hasDot9Insts() const {
873 return HasDot9Insts;
874 }
875
876 bool hasDot10Insts() const {
877 return HasDot10Insts;
878 }
879
880 bool hasDot11Insts() const {
881 return HasDot11Insts;
882 }
883
884 bool hasDot12Insts() const {
885 return HasDot12Insts;
886 }
887
888 bool hasDot13Insts() const {
889 return HasDot13Insts;
890 }
891
892 bool hasMAIInsts() const {
893 return HasMAIInsts;
894 }
895
896 bool hasFP8Insts() const {
897 return HasFP8Insts;
898 }
899
901
902 bool hasCubeInsts() const { return HasCubeInsts; }
903
904 bool hasLerpInst() const { return HasLerpInst; }
905
906 bool hasSadInsts() const { return HasSadInsts; }
907
908 bool hasQsadInsts() const { return HasQsadInsts; }
909
910 bool hasCvtNormInsts() const { return HasCvtNormInsts; }
911
913
915
916 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
917
918 bool hasPkFmacF16Inst() const {
919 return HasPkFmacF16Inst;
920 }
921
925
929
933
937
939
941
945
947
949
953
957
961
965
967
968 /// \return true if the target has flat, global, and buffer atomic fadd for
969 /// double.
973
974 /// \return true if the target's flat, global, and buffer atomic fadd for
975 /// float supports denormal handling.
979
980 /// \return true if atomic operations targeting fine-grained memory work
981 /// correctly at device scope, in allocations in host or peer PCIe device
982 /// memory.
986
987 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
988 /// via CAS loop.
992
994
998
999 bool hasNoSdstCMPX() const {
1000 return HasNoSdstCMPX;
1001 }
1002
1003 bool hasVscnt() const {
1004 return HasVscnt;
1005 }
1006
1007 bool hasGetWaveIdInst() const {
1008 return HasGetWaveIdInst;
1009 }
1010
1011 bool hasSMemTimeInst() const {
1012 return HasSMemTimeInst;
1013 }
1014
1017 }
1018
1022
1023 bool hasVOP3Literal() const {
1024 return HasVOP3Literal;
1025 }
1026
1027 bool hasNoDataDepHazard() const {
1028 return HasNoDataDepHazard;
1029 }
1030
1032 return getGeneration() < SEA_ISLANDS;
1033 }
1034
1035 bool hasInstPrefetch() const {
1036 return getGeneration() == GFX10 || getGeneration() == GFX11;
1037 }
1038
1039 bool hasPrefetch() const { return GFX12Insts; }
1040
1041 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1042
1044
1045 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1046
1047 // Has s_cmpk_* instructions.
1048 bool hasSCmpK() const { return getGeneration() < GFX12; }
1049
1050 // Scratch is allocated in 256 dword per wave blocks for the entire
1051 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1052 // is 4-byte aligned.
1053 //
1054 // Only 4-byte alignment is really needed to access anything. Transformations
1055 // on the pointer value itself may rely on the alignment / known low bits of
1056 // the pointer. Set this to something above the minimum to avoid needing
1057 // dynamic realignment in common cases.
1058 Align getStackAlignment() const { return Align(16); }
1059
1060 bool enableMachineScheduler() const override {
1061 return true;
1062 }
1063
1064 bool useAA() const override;
1065
1066 bool enableSubRegLiveness() const override {
1067 return true;
1068 }
1069
1072
1073 // static wrappers
1074 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1075
1076 // XXX - Why is this here if it isn't in the default pass set?
1077 bool enableEarlyIfConversion() const override {
1078 return true;
1079 }
1080
1082 const SchedRegion &Region) const override;
1083
1085 const SchedRegion &Region) const override;
1086
1087 void mirFileLoaded(MachineFunction &MF) const override;
1088
1089 unsigned getMaxNumUserSGPRs() const {
1090 return AMDGPU::getMaxNumUserSGPRs(*this);
1091 }
1092
1093 bool hasSMemRealTime() const {
1094 return HasSMemRealTime;
1095 }
1096
1097 bool hasMovrel() const {
1098 return HasMovrel;
1099 }
1100
1101 bool hasVGPRIndexMode() const {
1102 return HasVGPRIndexMode;
1103 }
1104
1105 bool useVGPRIndexMode() const;
1106
1108 return getGeneration() >= VOLCANIC_ISLANDS;
1109 }
1110
1112
1113 bool hasScalarStores() const {
1114 return HasScalarStores;
1115 }
1116
1117 bool hasScalarAtomics() const {
1118 return HasScalarAtomics;
1119 }
1120
1121 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1123
1124 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1125 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1126
1127 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1128 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1129
1130 bool hasDPP() const {
1131 return HasDPP;
1132 }
1133
1134 bool hasDPPBroadcasts() const {
1135 return HasDPP && getGeneration() < GFX10;
1136 }
1137
1139 return HasDPP && getGeneration() < GFX10;
1140 }
1141
1142 bool hasDPP8() const {
1143 return HasDPP8;
1144 }
1145
1146 bool hasDPALU_DPP() const {
1147 return HasDPALU_DPP;
1148 }
1149
1150 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1151
1152 bool hasPackedFP32Ops() const {
1153 return HasPackedFP32Ops;
1154 }
1155
1156 // Has V_PK_MOV_B32 opcode
1157 bool hasPkMovB32() const {
1158 return GFX90AInsts;
1159 }
1160
1162 return getGeneration() >= GFX10 || hasGFX940Insts();
1163 }
1164
1165 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1166
1167 bool hasImageInsts() const {
1168 return HasImageInsts;
1169 }
1170
1172 return HasExtendedImageInsts;
1173 }
1174
1175 bool hasR128A16() const {
1176 return HasR128A16;
1177 }
1178
1179 bool hasA16() const { return HasA16; }
1180
1181 bool hasG16() const { return HasG16; }
1182
1183 bool hasOffset3fBug() const {
1184 return HasOffset3fBug;
1185 }
1186
1188
1190
1191 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1192
1194
1196
1197 bool hasNSAEncoding() const { return HasNSAEncoding; }
1198
1199 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1200
1202
1203 unsigned getNSAMaxSize(bool HasSampler = false) const {
1204 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1205 }
1206
1207 bool hasGFX10_AEncoding() const {
1208 return GFX10_AEncoding;
1209 }
1210
1211 bool hasGFX10_BEncoding() const {
1212 return GFX10_BEncoding;
1213 }
1214
1215 bool hasGFX10_3Insts() const {
1216 return GFX10_3Insts;
1217 }
1218
1219 bool hasMadF16() const;
1220
1221 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1222
1223 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1224
1225 // Scalar and global loads support scale_offset bit.
1226 bool hasScaleOffset() const { return GFX1250Insts; }
1227
1228 bool hasFlatGVSMode() const { return FlatGVSMode; }
1229
1230 // FLAT GLOBAL VOffset is signed
1231 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1232
1233 bool enableSIScheduler() const {
1234 return EnableSIScheduler;
1235 }
1236
1237 bool loadStoreOptEnabled() const {
1238 return EnableLoadStoreOpt;
1239 }
1240
1241 bool hasSGPRInitBug() const {
1242 return SGPRInitBug;
1243 }
1244
1246 return UserSGPRInit16Bug && isWave32();
1247 }
1248
1250
1254
1257 }
1258
1262
1263 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1265 return CIInsts;
1266 }
1267
1270 }
1271
1276
1279 }
1280
1283 }
1284
1287 }
1288
1291 }
1292
1295 }
1296
1297 bool hasLDSMisalignedBug() const {
1298 return LDSMisalignedBug && !EnableCuMode;
1299 }
1300
1302 return HasInstFwdPrefetchBug;
1303 }
1304
1306 return HasVcmpxExecWARHazard;
1307 }
1308
1311 }
1312
1313 // Shift amount of a 64 bit shift cannot be a highest allocated register
1314 // if also at the end of the allocation block.
1316 return GFX90AInsts && !GFX940Insts;
1317 }
1318
1319 // Has one cycle hazard on transcendental instruction feeding a
1320 // non transcendental VALU.
1321 bool hasTransForwardingHazard() const { return GFX940Insts; }
1322
1323 // Has one cycle hazard on a VALU instruction partially writing dst with
1324 // a shift of result bits feeding another VALU instruction.
1326
1327 // Cannot use op_sel with v_dot instructions.
1328 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1329
1330 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1331 bool hasVDecCoExecHazard() const {
1332 return GFX940Insts;
1333 }
1334
1335 bool hasNSAtoVMEMBug() const {
1336 return HasNSAtoVMEMBug;
1337 }
1338
1339 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1340
1341 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1342
1343 bool hasGFX90AInsts() const { return GFX90AInsts; }
1344
1346 return getGeneration() == GFX10;
1347 }
1348
1349 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1350
1351 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1352
1353 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1354
1356 return getGeneration() == GFX11;
1357 }
1358
1360
1362
1363 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1364
1366
1370
1371 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1372
1373 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1374
1376 return GFX1250Insts && getGeneration() == GFX12;
1377 }
1378
1379 /// Return if operations acting on VGPR tuples require even alignment.
1380 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1381
1382 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1383 bool hasSPackHL() const { return GFX11Insts; }
1384
1385 /// Return true if the target's EXP instruction has the COMPR flag, which
1386 /// affects the meaning of the EN (enable) bits.
1387 bool hasCompressedExport() const { return !GFX11Insts; }
1388
1389 /// Return true if the target's EXP instruction supports the NULL export
1390 /// target.
1391 bool hasNullExportTarget() const { return !GFX11Insts; }
1392
1393 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1394
1395 bool hasVOPDInsts() const { return HasVOPDInsts; }
1396
1398
1399 /// Return true if the target has the S_DELAY_ALU instruction.
1400 bool hasDelayAlu() const { return GFX11Insts; }
1401
1402 bool hasPackedTID() const { return HasPackedTID; }
1403
1404 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1405 // hasGFX90AInsts is also true.
1406 bool hasGFX940Insts() const { return GFX940Insts; }
1407
1408 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1409 // hasGFX940Insts and hasGFX90AInsts are also true.
1410 bool hasGFX950Insts() const { return GFX950Insts; }
1411
1412 /// Returns true if the target supports
1413 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1414 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1415 bool hasLDSLoadB96_B128() const {
1416 return hasGFX950Insts();
1417 }
1418
1419 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1420
1421 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1422
1424
1426
1428
1430
1431 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1432 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1433 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1434
1435 /// \returns true if inline constants are not supported for F16 pseudo
1436 /// scalar transcendentals.
1438 return getGeneration() == GFX12;
1439 }
1440
1441 /// \returns true if the target has instructions with xf32 format support.
1442 bool hasXF32Insts() const { return HasXF32Insts; }
1443
1444 /// \returns true if the target has packed f32 instructions that only read 32
1445 /// bits from a scalar operand (SGPR or literal) and replicates the bits to
1446 /// both channels.
1450
1451 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1452
1453 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1454 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1455 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1456
1459 }
1460
1463 }
1464
1465 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1466
1467 bool hasTanhInsts() const { return HasTanhInsts; }
1468
1470
1471 bool hasAddPC64Inst() const { return GFX1250Insts; }
1472
1474
1477 }
1478
1480
1481 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1482 /// GFX1250.
1483 bool hasWaitXCnt() const { return HasWaitXcnt; }
1484
1485 // A single DWORD instructions can use a 64-bit literal.
1486 bool has64BitLiterals() const { return Has64BitLiterals; }
1487
1489
1491
1492 /// \returns The maximum number of instructions that can be enclosed in an
1493 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1494 /// instruction.
1495 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1496
1497 bool hasPrngInst() const { return HasPrngInst; }
1498
1500
1501 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1502 /// SGPRs
1503 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1504
1505 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1506 /// VGPRs
1507 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1508 unsigned DynamicVGPRBlockSize) const;
1509
1510 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1511 /// be achieved when the only function running on a CU is \p F, each workgroup
1512 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1513 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1514 /// range, so this returns a range as well.
1515 ///
1516 /// Note that occupancy can be affected by the scratch allocation as well, but
1517 /// we do not have enough information to compute it.
1518 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1519 unsigned LDSSize = 0,
1520 unsigned NumSGPRs = 0,
1521 unsigned NumVGPRs = 0) const;
1522
1523 /// \returns true if the flat_scratch register should be initialized with the
1524 /// pointer to the wave's scratch memory rather than a size and offset.
1527 }
1528
1529 /// \returns true if the flat_scratch register is initialized by the HW.
1530 /// In this case it is readonly.
1532
1533 /// \returns true if the architected SGPRs are enabled.
1535
1536 /// \returns true if Global Data Share is supported.
1537 bool hasGDS() const { return HasGDS; }
1538
1539 /// \returns true if Global Wave Sync is supported.
1540 bool hasGWS() const { return HasGWS; }
1541
1542 /// \returns true if the machine has merged shaders in which s0-s7 are
1543 /// reserved by the hardware and user SGPRs start at s8
1544 bool hasMergedShaders() const {
1545 return getGeneration() >= GFX9;
1546 }
1547
1548 // \returns true if the target supports the pre-NGG legacy geometry path.
1549 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1550
1551 // \returns true if preloading kernel arguments is supported.
1552 bool hasKernargPreload() const { return KernargPreload; }
1553
1554 // \returns true if the target has split barriers feature
1555 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1556
1557 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1558 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1559
1560 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1561 // no-return form.
1563
1564 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1565 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1566
1567 // \returns true if the target has IEEE kernel descriptor mode bit
1568 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1569
1570 // \returns true if the target has IEEE fminimum/fmaximum instructions
1572
1573 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1574 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1575
1576 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1577 /// values.
1578 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1579
1580 bool hasGFX1250Insts() const { return GFX1250Insts; }
1581
1582 bool hasVOPD3() const { return GFX1250Insts; }
1583
1584 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1585 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1586
1587 // \returns true if the target has V_MAD_U32 instruction.
1588 bool hasMadU32Inst() const { return HasMadU32Inst; }
1589
1590 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1591 bool hasVectorMulU64() const { return GFX1250Insts; }
1592
1593 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1594 // instructions.
1595 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1596
1597 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1598 bool hasIntMinMax64() const { return GFX1250Insts; }
1599
1600 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1601 bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }
1602
1603 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1605
1606 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1607 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1608
1609 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1610 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1611
1612 // \returns true if target has S_SETPRIO_INC_WG instruction.
1614
1615 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1616 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1617 // extended VA to 57 bits.
1618 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1619
1620 // \returns true if the target needs to create a prolog for backward
1621 // compatibility when preloading kernel arguments.
1623 return hasKernargPreload() && !GFX1250Insts;
1624 }
1625
1626 /// \returns SGPR allocation granularity supported by the subtarget.
1627 unsigned getSGPRAllocGranule() const {
1629 }
1630
1631 /// \returns SGPR encoding granularity supported by the subtarget.
1632 unsigned getSGPREncodingGranule() const {
1634 }
1635
1636 /// \returns Total number of SGPRs supported by the subtarget.
1637 unsigned getTotalNumSGPRs() const {
1639 }
1640
1641 /// \returns Addressable number of SGPRs supported by the subtarget.
1642 unsigned getAddressableNumSGPRs() const {
1644 }
1645
1646 /// \returns Minimum number of SGPRs that meets the given number of waves per
1647 /// execution unit requirement supported by the subtarget.
1648 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1649 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1650 }
1651
1652 /// \returns Maximum number of SGPRs that meets the given number of waves per
1653 /// execution unit requirement supported by the subtarget.
1654 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1655 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1656 }
1657
1658 /// \returns Reserved number of SGPRs. This is common
1659 /// utility function called by MachineFunction and
1660 /// Function variants of getReservedNumSGPRs.
1661 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1662 /// \returns Reserved number of SGPRs for given machine function \p MF.
1663 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1664
1665 /// \returns Reserved number of SGPRs for given function \p F.
1666 unsigned getReservedNumSGPRs(const Function &F) const;
1667
1668 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1669 unsigned getMaxNumPreloadedSGPRs() const;
1670
1671 /// \returns max num SGPRs. This is the common utility
1672 /// function called by MachineFunction and Function
1673 /// variants of getMaxNumSGPRs.
1674 unsigned getBaseMaxNumSGPRs(const Function &F,
1675 std::pair<unsigned, unsigned> WavesPerEU,
1676 unsigned PreloadedSGPRs,
1677 unsigned ReservedNumSGPRs) const;
1678
1679 /// \returns Maximum number of SGPRs that meets number of waves per execution
1680 /// unit requirement for function \p MF, or number of SGPRs explicitly
1681 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1682 ///
1683 /// \returns Value that meets number of waves per execution unit requirement
1684 /// if explicitly requested value cannot be converted to integer, violates
1685 /// subtarget's specifications, or does not meet number of waves per execution
1686 /// unit requirement.
1687 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1688
1689 /// \returns Maximum number of SGPRs that meets number of waves per execution
1690 /// unit requirement for function \p F, or number of SGPRs explicitly
1691 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1692 ///
1693 /// \returns Value that meets number of waves per execution unit requirement
1694 /// if explicitly requested value cannot be converted to integer, violates
1695 /// subtarget's specifications, or does not meet number of waves per execution
1696 /// unit requirement.
1697 unsigned getMaxNumSGPRs(const Function &F) const;
1698
1699 /// \returns VGPR allocation granularity supported by the subtarget.
1700 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1701 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1702 }
1703
1704 /// \returns VGPR encoding granularity supported by the subtarget.
1705 unsigned getVGPREncodingGranule() const {
1707 }
1708
1709 /// \returns Total number of VGPRs supported by the subtarget.
1710 unsigned getTotalNumVGPRs() const {
1712 }
1713
1714 /// \returns Addressable number of architectural VGPRs supported by the
1715 /// subtarget.
1719
1720 /// \returns Addressable number of VGPRs supported by the subtarget.
1721 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1722 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1723 }
1724
1725 /// \returns the minimum number of VGPRs that will prevent achieving more than
1726 /// the specified number of waves \p WavesPerEU.
1727 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1728 unsigned DynamicVGPRBlockSize) const {
1729 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1730 DynamicVGPRBlockSize);
1731 }
1732
1733 /// \returns the maximum number of VGPRs that can be used and still achieved
1734 /// at least the specified number of waves \p WavesPerEU.
1735 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1736 unsigned DynamicVGPRBlockSize) const {
1737 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1738 DynamicVGPRBlockSize);
1739 }
1740
1741 /// \returns max num VGPRs. This is the common utility function
1742 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1743 unsigned
1745 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1746
1747 /// \returns Maximum number of VGPRs that meets number of waves per execution
1748 /// unit requirement for function \p F, or number of VGPRs explicitly
1749 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1750 ///
1751 /// \returns Value that meets number of waves per execution unit requirement
1752 /// if explicitly requested value cannot be converted to integer, violates
1753 /// subtarget's specifications, or does not meet number of waves per execution
1754 /// unit requirement.
1755 unsigned getMaxNumVGPRs(const Function &F) const;
1756
1757 unsigned getMaxNumAGPRs(const Function &F) const {
1758 return getMaxNumVGPRs(F);
1759 }
1760
1761 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1762 /// of waves per execution unit required for the function \p MF.
1763 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1764
1765 /// \returns Maximum number of VGPRs that meets number of waves per execution
1766 /// unit requirement for function \p MF, or number of VGPRs explicitly
1767 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1768 ///
1769 /// \returns Value that meets number of waves per execution unit requirement
1770 /// if explicitly requested value cannot be converted to integer, violates
1771 /// subtarget's specifications, or does not meet number of waves per execution
1772 /// unit requirement.
1773 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1774
1775 bool supportsWave32() const { return getGeneration() >= GFX10; }
1776
1777 bool supportsWave64() const { return !hasGFX1250Insts(); }
1778
1779 bool isWave32() const {
1780 return getWavefrontSize() == 32;
1781 }
1782
1783 bool isWave64() const {
1784 return getWavefrontSize() == 64;
1785 }
1786
1787 /// Returns if the wavesize of this subtarget is known reliable. This is false
1788 /// only for the a default target-cpu that does not have an explicit
1789 /// +wavefrontsize target feature.
1790 bool isWaveSizeKnown() const {
1791 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1792 hasFeature(AMDGPU::FeatureWavefrontSize64);
1793 }
1794
1796 return getRegisterInfo()->getBoolRC();
1797 }
1798
1799 /// \returns Maximum number of work groups per compute unit supported by the
1800 /// subtarget and limited by given \p FlatWorkGroupSize.
1801 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1802 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1803 }
1804
1805 /// \returns Minimum flat work group size supported by the subtarget.
1806 unsigned getMinFlatWorkGroupSize() const override {
1808 }
1809
1810 /// \returns Maximum flat work group size supported by the subtarget.
1811 unsigned getMaxFlatWorkGroupSize() const override {
1813 }
1814
1815 /// \returns Number of waves per execution unit required to support the given
1816 /// \p FlatWorkGroupSize.
1817 unsigned
1818 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1819 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1820 }
1821
1822 /// \returns Minimum number of waves per execution unit supported by the
1823 /// subtarget.
1824 unsigned getMinWavesPerEU() const override {
1826 }
1827
1828 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1829 SDep &Dep,
1830 const TargetSchedModel *SchedModel) const override;
1831
1832 // \returns true if it's beneficial on this subtarget for the scheduler to
1833 // cluster stores as well as loads.
1834 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1835
1836 // \returns the number of address arguments from which to enable MIMG NSA
1837 // on supported architectures.
1838 unsigned getNSAThreshold(const MachineFunction &MF) const;
1839
1840 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1841 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1843
1844 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1845 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1847
1848 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1849 unsigned getDynamicVGPRBlockSize() const {
1850 return DynamicVGPRBlockSize32 ? 32 : 16;
1851 }
1852
1854 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1855 // to the same register.
1856 return false;
1857 }
1858
1859 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1860 // and surronded by S_WAIT_ALU(0xFFE3).
1862 return getGeneration() == GFX12;
1863 }
1864
1865 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1866 // read.
1868 return GFX1250Insts && getGeneration() == GFX12;
1869 }
1870
1871 /// \returns true if the subtarget supports clusters of workgroups.
1872 bool hasClusters() const { return HasClusters; }
1873
1874 /// \returns true if the subtarget requires a wait for xcnt before atomic
1875 /// flat/global stores & rmw.
1877
1878 /// \returns the number of significant bits in the immediate field of the
1879 /// S_NOP instruction.
1880 unsigned getSNopBits() const {
1882 return 7;
1884 return 4;
1885 return 3;
1886 }
1887
1888 /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
1889 /// num_records.
1893
1897};
1898
1900public:
1901 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1902
1903 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1904
1905 bool hasDispatchPtr() const { return DispatchPtr; }
1906
1907 bool hasQueuePtr() const { return QueuePtr; }
1908
1909 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1910
1911 bool hasDispatchID() const { return DispatchID; }
1912
1913 bool hasFlatScratchInit() const { return FlatScratchInit; }
1914
1915 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1916
1917 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1918
1919 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1920
1921 unsigned getNumFreeUserSGPRs();
1922
1923 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1924
1935
1936 // Returns the size in number of SGPRs for preload user SGPR field.
1938 switch (ID) {
1940 return 2;
1942 return 4;
1943 case DispatchPtrID:
1944 return 2;
1945 case QueuePtrID:
1946 return 2;
1948 return 2;
1949 case DispatchIdID:
1950 return 2;
1951 case FlatScratchInitID:
1952 return 2;
1954 return 1;
1955 }
1956 llvm_unreachable("Unknown UserSGPRID.");
1957 }
1958
1959 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1960
1961private:
1962 const GCNSubtarget &ST;
1963
1964 // Private memory buffer
1965 // Compute directly in sgpr[0:1]
1966 // Other shaders indirect 64-bits at sgpr[0:1]
1967 bool ImplicitBufferPtr = false;
1968
1969 bool PrivateSegmentBuffer = false;
1970
1971 bool DispatchPtr = false;
1972
1973 bool QueuePtr = false;
1974
1975 bool KernargSegmentPtr = false;
1976
1977 bool DispatchID = false;
1978
1979 bool FlatScratchInit = false;
1980
1981 bool PrivateSegmentSize = false;
1982
1983 unsigned NumKernargPreloadSGPRs = 0;
1984
1985 unsigned NumUsedUserSGPRs = 0;
1986};
1987
1988} // end namespace llvm
1989
1990#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
#define F(x, y, z)
Definition MD5.cpp:54
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasFlat() const
bool hasD16Images() const
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasSDWAOmod() const
bool hasFlatGVSMode() const
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasSwap() const
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool Has45BitNumRecordsBufferResource
bool flatScratchIsPointer() const
bool hasSDWAMac() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
unsigned getSNopBits() const
bool hasMAIInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
bool hasScaleOffset() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
const InstrItineraryData * getInstrItineraryData() const override
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
bool hasR128A16() const
bool hasCvtPkNormVOP3Insts() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasFmaMixBF16Insts() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
bool supportsWave32() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
bool hasFP8Insts() const
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasClusters() const
bool hasVscnt() const
bool hasMad64_32() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool useDS128() const
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool RequiresWaitsBeforeSystemScopeStores
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasDPP() const
bool hasVINTERPEncoding() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
bool hasCvtNormInsts() const
bool has45BitNumRecordsBufferResource() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasIntClamp() const
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
bool hasFlatSegmentOffsetBug() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool HasGloballyAddressableScratch
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
bool HasAtomicFMinFMaxF32FlatInsts
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasScratchBaseForwardingHazard() const
bool hasLerpInst() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
bool hasSDWASdst() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasFFBL() const
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasCubeInsts() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasMed3_16() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
bool hasBFI() const
bool hasDot13Insts() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
bool hasFormattedMUBUFInsts() const
bool hasFlatScratchSVSMode() const
bool supportsWGP() const
bool hasG16() const
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasSadInsts() const
bool hasMIMG_R128() const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool HasEmulatedSystemScopeAtomics
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasCvtPkNormVOP2Insts() const
bool requiresWaitXCntBeforeAtomicStores() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
bool hasDS96AndDS128() const
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasAddr64() const
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
bool hasCARRY() const
bool hasQsadInsts() const
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
bool hasAddNoCarry() const
bool requiresWaitsBeforeSystemScopeStores() const
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
bool hasFractBug() const
bool isPreciseMemoryEnabled() const
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasMTBUFInsts() const
bool hasDot4Insts() const
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
bool hasVOPD3() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasBFE() const
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
unsigned getNumKernargPreloadSGPRs() const
unsigned getNumUsedUserSGPRs() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition ScheduleDAG.h:51
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.