LLVM 19.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
26
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
29
30namespace llvm {
31
32class GCNTargetMachine;
33
35 public AMDGPUSubtarget {
36public:
38
39 // Following 2 enums are documented at:
40 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
41 enum class TrapHandlerAbi {
42 NONE = 0x00,
43 AMDHSA = 0x01,
44 };
45
46 enum class TrapID {
47 LLVMAMDHSATrap = 0x02,
49 };
50
51private:
52 /// GlobalISel related APIs.
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
56 std::unique_ptr<LegalizerInfo> Legalizer;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
58
59protected:
60 // Basic subtarget description.
63 unsigned Gen = INVALID;
65 int LDSBankCount = 0;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastDenormalF32 = false;
70 bool HalfRate64Ops = false;
71 bool FullRate64Ops = false;
72
73 // Dynamically set bits that enable features.
74 bool FlatForGlobal = false;
76 bool BackOffBarrier = false;
78 bool UnalignedAccessMode = false;
79 bool HasApertureRegs = false;
80 bool SupportsXNACK = false;
81 bool KernargPreload = false;
82
83 // This should not be used directly. 'TargetID' tracks the dynamic settings
84 // for XNACK.
85 bool EnableXNACK = false;
86
87 bool EnableTgSplit = false;
88 bool EnableCuMode = false;
89 bool TrapHandler = false;
90 bool EnablePreciseMemory = false;
91
92 // Used as options.
93 bool EnableLoadStoreOpt = false;
95 bool EnableSIScheduler = false;
96 bool EnableDS128 = false;
97 bool EnablePRTStrictNull = false;
98 bool DumpCode = false;
99
100 // Subtarget statically properties set by tablegen
101 bool FP64 = false;
102 bool FMA = false;
103 bool MIMG_R128 = false;
104 bool CIInsts = false;
105 bool GFX8Insts = false;
106 bool GFX9Insts = false;
107 bool GFX90AInsts = false;
108 bool GFX940Insts = false;
109 bool GFX10Insts = false;
110 bool GFX11Insts = false;
111 bool GFX12Insts = false;
112 bool GFX10_3Insts = false;
113 bool GFX7GFX8GFX9Insts = false;
114 bool SGPRInitBug = false;
115 bool UserSGPRInit16Bug = false;
118 bool HasSMemRealTime = false;
119 bool HasIntClamp = false;
120 bool HasFmaMixInsts = false;
121 bool HasMovrel = false;
122 bool HasVGPRIndexMode = false;
124 bool HasScalarStores = false;
125 bool HasScalarAtomics = false;
126 bool HasSDWAOmod = false;
127 bool HasSDWAScalar = false;
128 bool HasSDWASdst = false;
129 bool HasSDWAMac = false;
130 bool HasSDWAOutModsVOPC = false;
131 bool HasDPP = false;
132 bool HasDPP8 = false;
133 bool HasDPALU_DPP = false;
134 bool HasDPPSrc1SGPR = false;
135 bool HasPackedFP32Ops = false;
136 bool HasImageInsts = false;
138 bool HasR128A16 = false;
139 bool HasA16 = false;
140 bool HasG16 = false;
141 bool HasNSAEncoding = false;
143 bool GFX10_AEncoding = false;
144 bool GFX10_BEncoding = false;
145 bool HasDLInsts = false;
146 bool HasFmacF64Inst = false;
147 bool HasDot1Insts = false;
148 bool HasDot2Insts = false;
149 bool HasDot3Insts = false;
150 bool HasDot4Insts = false;
151 bool HasDot5Insts = false;
152 bool HasDot6Insts = false;
153 bool HasDot7Insts = false;
154 bool HasDot8Insts = false;
155 bool HasDot9Insts = false;
156 bool HasDot10Insts = false;
157 bool HasDot11Insts = false;
158 bool HasMAIInsts = false;
159 bool HasFP8Insts = false;
161 bool HasPkFmacF16Inst = false;
181 /// The maximum number of instructions that may be placed within an S_CLAUSE,
182 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
183 /// indicates a lack of S_CLAUSE support.
185 bool SupportsSRAMECC = false;
186
187 // This should not be used directly. 'TargetID' tracks the dynamic settings
188 // for SRAMECC.
189 bool EnableSRAMECC = false;
190
191 bool HasNoSdstCMPX = false;
192 bool HasVscnt = false;
193 bool HasGetWaveIdInst = false;
194 bool HasSMemTimeInst = false;
197 bool HasVOP3Literal = false;
198 bool HasNoDataDepHazard = false;
199 bool FlatAddressSpace = false;
200 bool FlatInstOffsets = false;
201 bool FlatGlobalInsts = false;
202 bool FlatScratchInsts = false;
205 bool EnableFlatScratch = false;
207 bool HasGDS = false;
208 bool HasGWS = false;
209 bool AddNoCarryInsts = false;
210 bool HasUnpackedD16VMem = false;
211 bool LDSMisalignedBug = false;
214 bool UnalignedDSAccess = false;
215 bool HasPackedTID = false;
216 bool ScalarizeGlobal = false;
217 bool HasSALUFloatInsts = false;
221
228 bool HasNSAtoVMEMBug = false;
229 bool HasNSAClauseBug = false;
230 bool HasOffset3fBug = false;
236 bool Has1_5xVGPRs = false;
237 bool HasMADIntraFwdBug = false;
238 bool HasVOPDInsts = false;
241
242 bool RequiresCOV6 = false;
243
244 // Dummy feature to use for assembler in tablegen.
245 bool FeatureDisable = false;
246
248private:
249 SIInstrInfo InstrInfo;
250 SITargetLowering TLInfo;
251 SIFrameLowering FrameLowering;
252
253public:
254 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
255 const GCNTargetMachine &TM);
256 ~GCNSubtarget() override;
257
259 StringRef GPU, StringRef FS);
260
261 /// Diagnose inconsistent subtarget features before attempting to codegen
262 /// function \p F.
263 void checkSubtargetFeatures(const Function &F) const;
264
265 const SIInstrInfo *getInstrInfo() const override {
266 return &InstrInfo;
267 }
268
269 const SIFrameLowering *getFrameLowering() const override {
270 return &FrameLowering;
271 }
272
273 const SITargetLowering *getTargetLowering() const override {
274 return &TLInfo;
275 }
276
277 const SIRegisterInfo *getRegisterInfo() const override {
278 return &InstrInfo.getRegisterInfo();
279 }
280
281 const CallLowering *getCallLowering() const override {
282 return CallLoweringInfo.get();
283 }
284
285 const InlineAsmLowering *getInlineAsmLowering() const override {
286 return InlineAsmLoweringInfo.get();
287 }
288
290 return InstSelector.get();
291 }
292
293 const LegalizerInfo *getLegalizerInfo() const override {
294 return Legalizer.get();
295 }
296
297 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
298 return RegBankInfo.get();
299 }
300
302 return TargetID;
303 }
304
305 // Nothing implemented, just prevent crashes on use.
307 return &TSInfo;
308 }
309
311 return &InstrItins;
312 }
313
315
317 return (Generation)Gen;
318 }
319
320 unsigned getMaxWaveScratchSize() const {
321 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
322 if (getGeneration() >= GFX12) {
323 // 18-bit field in units of 64-dword.
324 return (64 * 4) * ((1 << 18) - 1);
325 }
326 if (getGeneration() == GFX11) {
327 // 15-bit field in units of 64-dword.
328 return (64 * 4) * ((1 << 15) - 1);
329 }
330 // 13-bit field in units of 256-dword.
331 return (256 * 4) * ((1 << 13) - 1);
332 }
333
334 /// Return the number of high bits known to be zero for a frame index.
337 }
338
339 int getLDSBankCount() const {
340 return LDSBankCount;
341 }
342
343 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
344 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
345 }
346
347 unsigned getConstantBusLimit(unsigned Opcode) const;
348
349 /// Returns if the result of this instruction with a 16-bit result returned in
350 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
351 /// the original value.
352 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
353
354 bool supportsWGP() const { return getGeneration() >= GFX10; }
355
356 bool hasIntClamp() const {
357 return HasIntClamp;
358 }
359
360 bool hasFP64() const {
361 return FP64;
362 }
363
364 bool hasMIMG_R128() const {
365 return MIMG_R128;
366 }
367
368 bool hasHWFP64() const {
369 return FP64;
370 }
371
372 bool hasHalfRate64Ops() const {
373 return HalfRate64Ops;
374 }
375
376 bool hasFullRate64Ops() const {
377 return FullRate64Ops;
378 }
379
380 bool hasAddr64() const {
382 }
383
384 bool hasFlat() const {
386 }
387
388 // Return true if the target only has the reverse operand versions of VALU
389 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
390 bool hasOnlyRevVALUShifts() const {
392 }
393
394 bool hasFractBug() const {
396 }
397
398 bool hasBFE() const {
399 return true;
400 }
401
402 bool hasBFI() const {
403 return true;
404 }
405
406 bool hasBFM() const {
407 return hasBFE();
408 }
409
410 bool hasBCNT(unsigned Size) const {
411 return true;
412 }
413
414 bool hasFFBL() const {
415 return true;
416 }
417
418 bool hasFFBH() const {
419 return true;
420 }
421
422 bool hasMed3_16() const {
424 }
425
426 bool hasMin3Max3_16() const {
428 }
429
430 bool hasFmaMixInsts() const {
431 return HasFmaMixInsts;
432 }
433
434 bool hasCARRY() const {
435 return true;
436 }
437
438 bool hasFMA() const {
439 return FMA;
440 }
441
442 bool hasSwap() const {
443 return GFX9Insts;
444 }
445
446 bool hasScalarPackInsts() const {
447 return GFX9Insts;
448 }
449
450 bool hasScalarMulHiInsts() const {
451 return GFX9Insts;
452 }
453
454 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
455
458 }
459
461 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
462 return getGeneration() >= GFX9;
463 }
464
465 /// True if the offset field of DS instructions works as expected. On SI, the
466 /// offset uses a 16-bit adder and does not always wrap properly.
467 bool hasUsableDSOffset() const {
468 return getGeneration() >= SEA_ISLANDS;
469 }
470
473 }
474
475 /// Condition output from div_scale is usable.
478 }
479
480 /// Extra wait hazard is needed in some cases before
481 /// s_cbranch_vccnz/s_cbranch_vccz.
482 bool hasReadVCCZBug() const {
483 return getGeneration() <= SEA_ISLANDS;
484 }
485
486 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
488 return getGeneration() >= GFX10;
489 }
490
491 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
492 /// was written by a VALU instruction.
495 }
496
497 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
498 /// SGPR was written by a VALU Instruction.
501 }
502
503 bool hasRFEHazards() const {
505 }
506
507 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
508 unsigned getSetRegWaitStates() const {
509 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
510 }
511
512 bool dumpCode() const {
513 return DumpCode;
514 }
515
516 /// Return the amount of LDS that can be used that will not restrict the
517 /// occupancy lower than WaveCount.
518 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
519 const Function &) const;
520
523 }
524
525 /// \returns If target supports S_DENORM_MODE.
526 bool hasDenormModeInst() const {
528 }
529
530 bool useFlatForGlobal() const {
531 return FlatForGlobal;
532 }
533
534 /// \returns If target supports ds_read/write_b128 and user enables generation
535 /// of ds_read/write_b128.
536 bool useDS128() const {
537 return CIInsts && EnableDS128;
538 }
539
540 /// \return If target supports ds_read/write_b96/128.
541 bool hasDS96AndDS128() const {
542 return CIInsts;
543 }
544
545 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
546 bool haveRoundOpsF64() const {
547 return CIInsts;
548 }
549
550 /// \returns If MUBUF instructions always perform range checking, even for
551 /// buffer resources used for private memory access.
554 }
555
556 /// \returns If target requires PRT Struct NULL support (zero result registers
557 /// for sparse texture support).
558 bool usePRTStrictNull() const {
559 return EnablePRTStrictNull;
560 }
561
564 }
565
566 /// \returns true if the target supports backing off of s_barrier instructions
567 /// when an exception is raised.
569 return BackOffBarrier;
570 }
571
574 }
575
578 }
579
580 bool hasUnalignedDSAccess() const {
581 return UnalignedDSAccess;
582 }
583
586 }
587
590 }
591
593 return UnalignedAccessMode;
594 }
595
596 bool hasApertureRegs() const {
597 return HasApertureRegs;
598 }
599
600 bool isTrapHandlerEnabled() const {
601 return TrapHandler;
602 }
603
604 bool isXNACKEnabled() const {
605 return TargetID.isXnackOnOrAny();
606 }
607
608 bool isTgSplitEnabled() const {
609 return EnableTgSplit;
610 }
611
612 bool isCuModeEnabled() const {
613 return EnableCuMode;
614 }
615
617
618 bool hasFlatAddressSpace() const {
619 return FlatAddressSpace;
620 }
621
622 bool hasFlatScrRegister() const {
623 return hasFlatAddressSpace();
624 }
625
626 bool hasFlatInstOffsets() const {
627 return FlatInstOffsets;
628 }
629
630 bool hasFlatGlobalInsts() const {
631 return FlatGlobalInsts;
632 }
633
634 bool hasFlatScratchInsts() const {
635 return FlatScratchInsts;
636 }
637
638 // Check if target supports ST addressing mode with FLAT scratch instructions.
639 // The ST addressing mode means no registers are used, either VGPR or SGPR,
640 // but only immediate offset is swizzled and added to the FLAT scratch base.
641 bool hasFlatScratchSTMode() const {
643 }
644
645 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
646
649 }
650
651 bool enableFlatScratch() const {
652 return flatScratchIsArchitected() ||
654 }
655
656 bool hasGlobalAddTidInsts() const {
657 return GFX10_BEncoding;
658 }
659
660 bool hasAtomicCSub() const {
661 return GFX10_BEncoding;
662 }
663
664 bool hasExportInsts() const {
665 return !hasGFX940Insts();
666 }
667
668 bool hasVINTERPEncoding() const {
669 return GFX11Insts;
670 }
671
672 // DS_ADD_F64/DS_ADD_RTN_F64
673 bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
674
676 return getGeneration() >= GFX9;
677 }
678
681 }
682
684 return getGeneration() > GFX9;
685 }
686
687 bool hasD16LoadStore() const {
688 return getGeneration() >= GFX9;
689 }
690
693 }
694
695 bool hasD16Images() const {
697 }
698
699 /// Return if most LDS instructions have an m0 use that require m0 to be
700 /// initialized.
701 bool ldsRequiresM0Init() const {
702 return getGeneration() < GFX9;
703 }
704
705 // True if the hardware rewinds and replays GWS operations if a wave is
706 // preempted.
707 //
708 // If this is false, a GWS operation requires testing if a nack set the
709 // MEM_VIOL bit, and repeating if so.
710 bool hasGWSAutoReplay() const {
711 return getGeneration() >= GFX9;
712 }
713
714 /// \returns if target has ds_gws_sema_release_all instruction.
715 bool hasGWSSemaReleaseAll() const {
716 return CIInsts;
717 }
718
719 /// \returns true if the target has integer add/sub instructions that do not
720 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
721 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
722 /// for saturation.
723 bool hasAddNoCarry() const {
724 return AddNoCarryInsts;
725 }
726
727 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
728
729 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
730
731 bool hasUnpackedD16VMem() const {
732 return HasUnpackedD16VMem;
733 }
734
735 // Covers VS/PS/CS graphics shaders
736 bool isMesaGfxShader(const Function &F) const {
737 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
738 }
739
740 bool hasMad64_32() const {
741 return getGeneration() >= SEA_ISLANDS;
742 }
743
744 bool hasSDWAOmod() const {
745 return HasSDWAOmod;
746 }
747
748 bool hasSDWAScalar() const {
749 return HasSDWAScalar;
750 }
751
752 bool hasSDWASdst() const {
753 return HasSDWASdst;
754 }
755
756 bool hasSDWAMac() const {
757 return HasSDWAMac;
758 }
759
760 bool hasSDWAOutModsVOPC() const {
761 return HasSDWAOutModsVOPC;
762 }
763
764 bool hasDLInsts() const {
765 return HasDLInsts;
766 }
767
768 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
769
770 bool hasDot1Insts() const {
771 return HasDot1Insts;
772 }
773
774 bool hasDot2Insts() const {
775 return HasDot2Insts;
776 }
777
778 bool hasDot3Insts() const {
779 return HasDot3Insts;
780 }
781
782 bool hasDot4Insts() const {
783 return HasDot4Insts;
784 }
785
786 bool hasDot5Insts() const {
787 return HasDot5Insts;
788 }
789
790 bool hasDot6Insts() const {
791 return HasDot6Insts;
792 }
793
794 bool hasDot7Insts() const {
795 return HasDot7Insts;
796 }
797
798 bool hasDot8Insts() const {
799 return HasDot8Insts;
800 }
801
802 bool hasDot9Insts() const {
803 return HasDot9Insts;
804 }
805
806 bool hasDot10Insts() const {
807 return HasDot10Insts;
808 }
809
810 bool hasDot11Insts() const {
811 return HasDot11Insts;
812 }
813
814 bool hasMAIInsts() const {
815 return HasMAIInsts;
816 }
817
818 bool hasFP8Insts() const {
819 return HasFP8Insts;
820 }
821
823
824 bool hasPkFmacF16Inst() const {
825 return HasPkFmacF16Inst;
826 }
827
830 }
831
834 }
835
838 }
839
842 }
843
845
847
848 bool hasAtomicFaddInsts() const {
850 }
851
853
855
858 }
859
862 }
863
866 }
867
870 }
871
873
874 /// \return true if the target has flat, global, and buffer atomic fadd for
875 /// double.
878 }
879
880 /// \return true if the target's flat, global, and buffer atomic fadd for
881 /// float supports denormal handling.
884 }
885
886 /// \return true if atomic operations targeting fine-grained memory work
887 /// correctly at device scope, in allocations in host or peer PCIe device
888 /// memory.
891 }
892
894
897 }
898
899 bool hasNoSdstCMPX() const {
900 return HasNoSdstCMPX;
901 }
902
903 bool hasVscnt() const {
904 return HasVscnt;
905 }
906
907 bool hasGetWaveIdInst() const {
908 return HasGetWaveIdInst;
909 }
910
911 bool hasSMemTimeInst() const {
912 return HasSMemTimeInst;
913 }
914
917 }
918
921 }
922
923 bool hasVOP3Literal() const {
924 return HasVOP3Literal;
925 }
926
927 bool hasNoDataDepHazard() const {
928 return HasNoDataDepHazard;
929 }
930
932 return getGeneration() < SEA_ISLANDS;
933 }
934
935 bool hasInstPrefetch() const {
936 return getGeneration() == GFX10 || getGeneration() == GFX11;
937 }
938
939 bool hasPrefetch() const { return GFX12Insts; }
940
941 // Has s_cmpk_* instructions.
942 bool hasSCmpK() const { return getGeneration() < GFX12; }
943
944 // Scratch is allocated in 256 dword per wave blocks for the entire
945 // wavefront. When viewed from the perspective of an arbitrary workitem, this
946 // is 4-byte aligned.
947 //
948 // Only 4-byte alignment is really needed to access anything. Transformations
949 // on the pointer value itself may rely on the alignment / known low bits of
950 // the pointer. Set this to something above the minimum to avoid needing
951 // dynamic realignment in common cases.
952 Align getStackAlignment() const { return Align(16); }
953
954 bool enableMachineScheduler() const override {
955 return true;
956 }
957
958 bool useAA() const override;
959
960 bool enableSubRegLiveness() const override {
961 return true;
962 }
963
966
967 // static wrappers
968 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
969
970 // XXX - Why is this here if it isn't in the default pass set?
971 bool enableEarlyIfConversion() const override {
972 return true;
973 }
974
976 unsigned NumRegionInstrs) const override;
977
978 void mirFileLoaded(MachineFunction &MF) const override;
979
980 unsigned getMaxNumUserSGPRs() const {
981 return AMDGPU::getMaxNumUserSGPRs(*this);
982 }
983
984 bool hasSMemRealTime() const {
985 return HasSMemRealTime;
986 }
987
988 bool hasMovrel() const {
989 return HasMovrel;
990 }
991
992 bool hasVGPRIndexMode() const {
993 return HasVGPRIndexMode;
994 }
995
996 bool useVGPRIndexMode() const;
997
998 bool hasScalarCompareEq64() const {
1000 }
1001
1003
1004 bool hasScalarStores() const {
1005 return HasScalarStores;
1006 }
1007
1008 bool hasScalarAtomics() const {
1009 return HasScalarAtomics;
1010 }
1011
1012 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1013 bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
1014
1015 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1016 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1017
1018 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1019 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1020
1021 bool hasDPP() const {
1022 return HasDPP;
1023 }
1024
1025 bool hasDPPBroadcasts() const {
1026 return HasDPP && getGeneration() < GFX10;
1027 }
1028
1030 return HasDPP && getGeneration() < GFX10;
1031 }
1032
1033 bool hasDPP8() const {
1034 return HasDPP8;
1035 }
1036
1037 bool hasDPALU_DPP() const {
1038 return HasDPALU_DPP;
1039 }
1040
1041 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1042
1043 bool hasPackedFP32Ops() const {
1044 return HasPackedFP32Ops;
1045 }
1046
1047 // Has V_PK_MOV_B32 opcode
1048 bool hasPkMovB32() const {
1049 return GFX90AInsts;
1050 }
1051
1053 return getGeneration() >= GFX10 || hasGFX940Insts();
1054 }
1055
1056 bool hasImageInsts() const {
1057 return HasImageInsts;
1058 }
1059
1061 return HasExtendedImageInsts;
1062 }
1063
1064 bool hasR128A16() const {
1065 return HasR128A16;
1066 }
1067
1068 bool hasA16() const { return HasA16; }
1069
1070 bool hasG16() const { return HasG16; }
1071
1072 bool hasOffset3fBug() const {
1073 return HasOffset3fBug;
1074 }
1075
1077
1079
1080 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1081
1083
1085
1086 bool hasNSAEncoding() const { return HasNSAEncoding; }
1087
1088 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1089
1091
1092 unsigned getNSAMaxSize(bool HasSampler = false) const {
1093 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1094 }
1095
1096 bool hasGFX10_AEncoding() const {
1097 return GFX10_AEncoding;
1098 }
1099
1100 bool hasGFX10_BEncoding() const {
1101 return GFX10_BEncoding;
1102 }
1103
1104 bool hasGFX10_3Insts() const {
1105 return GFX10_3Insts;
1106 }
1107
1108 bool hasMadF16() const;
1109
1110 bool hasMovB64() const { return GFX940Insts; }
1111
1112 bool hasLshlAddB64() const { return GFX940Insts; }
1113
1114 bool enableSIScheduler() const {
1115 return EnableSIScheduler;
1116 }
1117
1118 bool loadStoreOptEnabled() const {
1119 return EnableLoadStoreOpt;
1120 }
1121
1122 bool hasSGPRInitBug() const {
1123 return SGPRInitBug;
1124 }
1125
1127 return UserSGPRInit16Bug && isWave32();
1128 }
1129
1131
1134 }
1135
1138 }
1139
1142 }
1143
1144 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1146 return CIInsts;
1147 }
1148
1151 }
1152
1156 }
1157
1160 }
1161
1164 }
1165
1168 }
1169
1172 }
1173
1176 }
1177
1178 bool hasLDSMisalignedBug() const {
1179 return LDSMisalignedBug && !EnableCuMode;
1180 }
1181
1183 return HasInstFwdPrefetchBug;
1184 }
1185
1187 return HasVcmpxExecWARHazard;
1188 }
1189
1192 }
1193
1194 // Shift amount of a 64 bit shift cannot be a highest allocated register
1195 // if also at the end of the allocation block.
1197 return GFX90AInsts && !GFX940Insts;
1198 }
1199
1200 // Has one cycle hazard on transcendental instruction feeding a
1201 // non transcendental VALU.
1202 bool hasTransForwardingHazard() const { return GFX940Insts; }
1203
1204 // Has one cycle hazard on a VALU instruction partially writing dst with
1205 // a shift of result bits feeding another VALU instruction.
1207
1208 // Cannot use op_sel with v_dot instructions.
1209 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1210
1211 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1212 bool hasVDecCoExecHazard() const {
1213 return GFX940Insts;
1214 }
1215
1216 bool hasNSAtoVMEMBug() const {
1217 return HasNSAtoVMEMBug;
1218 }
1219
1220 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1221
1222 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1223
1224 bool hasGFX90AInsts() const { return GFX90AInsts; }
1225
1227 return getGeneration() == GFX10;
1228 }
1229
1230 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1231
1232 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1233
1234 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1235
1237 return getGeneration() == GFX11;
1238 }
1239
1241
1243
1244 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1245
1246 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1247
1248 /// Return if operations acting on VGPR tuples require even alignment.
1249 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1250
1251 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1252 bool hasSPackHL() const { return GFX11Insts; }
1253
1254 /// Return true if the target's EXP instruction has the COMPR flag, which
1255 /// affects the meaning of the EN (enable) bits.
1256 bool hasCompressedExport() const { return !GFX11Insts; }
1257
1258 /// Return true if the target's EXP instruction supports the NULL export
1259 /// target.
1260 bool hasNullExportTarget() const { return !GFX11Insts; }
1261
1262 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1263
1264 bool hasVOPDInsts() const { return HasVOPDInsts; }
1265
1267
1268 /// Return true if the target has the S_DELAY_ALU instruction.
1269 bool hasDelayAlu() const { return GFX11Insts; }
1270
1271 bool hasPackedTID() const { return HasPackedTID; }
1272
1273 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1274 // hasGFX90AInsts is also true.
1275 bool hasGFX940Insts() const { return GFX940Insts; }
1276
1277 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1278
1280
1282
1284
1285 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1286 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1287 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1288
1289 /// \returns The maximum number of instructions that can be enclosed in an
1290 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1291 /// instruction.
1292 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1293
1294 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1295 /// SGPRs
1296 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1297
1298 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1299 /// VGPRs
1300 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1301
1302 /// Return occupancy for the given function. Used LDS and a number of
1303 /// registers if provided.
1304 /// Note, occupancy can be affected by the scratch allocation as well, but
1305 /// we do not have enough information to compute it.
1306 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1307 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1308
1309 /// \returns true if the flat_scratch register should be initialized with the
1310 /// pointer to the wave's scratch memory rather than a size and offset.
1313 }
1314
1315 /// \returns true if the flat_scratch register is initialized by the HW.
1316 /// In this case it is readonly.
1318
1319 /// \returns true if the architected SGPRs are enabled.
1321
1322 /// \returns true if Global Data Share is supported.
1323 bool hasGDS() const { return HasGDS; }
1324
1325 /// \returns true if Global Wave Sync is supported.
1326 bool hasGWS() const { return HasGWS; }
1327
1328 /// \returns true if the machine has merged shaders in which s0-s7 are
1329 /// reserved by the hardware and user SGPRs start at s8
1330 bool hasMergedShaders() const {
1331 return getGeneration() >= GFX9;
1332 }
1333
1334 // \returns true if the target supports the pre-NGG legacy geometry path.
1335 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1336
1337 // \returns true if preloading kernel arguments is supported.
1338 bool hasKernargPreload() const { return KernargPreload; }
1339
1340 // \returns true if the target has split barriers feature
1341 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1342
1343 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1344 bool hasCvtFP8VOP1Bug() const { return true; }
1345
1346 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1347 // no-return form.
1349
1350 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1351 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1352
1353 // \returns true if the target has IEEE kernel descriptor mode bit
1354 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1355
1356 // \returns true if the target has IEEE fminimum/fmaximum instructions
1357 bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
1358
1359 // \returns true if the target has IEEE fminimum3/fmaximum3 instructions
1360 bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
1361
1362 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1363 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1364
1365 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1366 /// values.
1367 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1368
1369 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1370 // of sign-extending.
1371 bool hasGetPCZeroExtension() const { return GFX12Insts; }
1372
1373 /// \returns SGPR allocation granularity supported by the subtarget.
1374 unsigned getSGPRAllocGranule() const {
1376 }
1377
1378 /// \returns SGPR encoding granularity supported by the subtarget.
1379 unsigned getSGPREncodingGranule() const {
1381 }
1382
1383 /// \returns Total number of SGPRs supported by the subtarget.
1384 unsigned getTotalNumSGPRs() const {
1386 }
1387
1388 /// \returns Addressable number of SGPRs supported by the subtarget.
1389 unsigned getAddressableNumSGPRs() const {
1391 }
1392
1393 /// \returns Minimum number of SGPRs that meets the given number of waves per
1394 /// execution unit requirement supported by the subtarget.
1395 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1396 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1397 }
1398
1399 /// \returns Maximum number of SGPRs that meets the given number of waves per
1400 /// execution unit requirement supported by the subtarget.
1401 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1402 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1403 }
1404
1405 /// \returns Reserved number of SGPRs. This is common
1406 /// utility function called by MachineFunction and
1407 /// Function variants of getReservedNumSGPRs.
1408 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1409 /// \returns Reserved number of SGPRs for given machine function \p MF.
1410 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1411
1412 /// \returns Reserved number of SGPRs for given function \p F.
1413 unsigned getReservedNumSGPRs(const Function &F) const;
1414
1415 /// \returns max num SGPRs. This is the common utility
1416 /// function called by MachineFunction and Function
1417 /// variants of getMaxNumSGPRs.
1418 unsigned getBaseMaxNumSGPRs(const Function &F,
1419 std::pair<unsigned, unsigned> WavesPerEU,
1420 unsigned PreloadedSGPRs,
1421 unsigned ReservedNumSGPRs) const;
1422
1423 /// \returns Maximum number of SGPRs that meets number of waves per execution
1424 /// unit requirement for function \p MF, or number of SGPRs explicitly
1425 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1426 ///
1427 /// \returns Value that meets number of waves per execution unit requirement
1428 /// if explicitly requested value cannot be converted to integer, violates
1429 /// subtarget's specifications, or does not meet number of waves per execution
1430 /// unit requirement.
1431 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1432
1433 /// \returns Maximum number of SGPRs that meets number of waves per execution
1434 /// unit requirement for function \p F, or number of SGPRs explicitly
1435 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1436 ///
1437 /// \returns Value that meets number of waves per execution unit requirement
1438 /// if explicitly requested value cannot be converted to integer, violates
1439 /// subtarget's specifications, or does not meet number of waves per execution
1440 /// unit requirement.
1441 unsigned getMaxNumSGPRs(const Function &F) const;
1442
1443 /// \returns VGPR allocation granularity supported by the subtarget.
1444 unsigned getVGPRAllocGranule() const {
1446 }
1447
1448 /// \returns VGPR encoding granularity supported by the subtarget.
1449 unsigned getVGPREncodingGranule() const {
1451 }
1452
1453 /// \returns Total number of VGPRs supported by the subtarget.
1454 unsigned getTotalNumVGPRs() const {
1456 }
1457
1458 /// \returns Addressable number of architectural VGPRs supported by the
1459 /// subtarget.
1462 }
1463
1464 /// \returns Addressable number of VGPRs supported by the subtarget.
1465 unsigned getAddressableNumVGPRs() const {
1467 }
1468
1469 /// \returns the minimum number of VGPRs that will prevent achieving more than
1470 /// the specified number of waves \p WavesPerEU.
1471 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1472 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1473 }
1474
1475 /// \returns the maximum number of VGPRs that can be used and still achieved
1476 /// at least the specified number of waves \p WavesPerEU.
1477 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1478 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1479 }
1480
1481 /// \returns max num VGPRs. This is the common utility function
1482 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1483 unsigned getBaseMaxNumVGPRs(const Function &F,
1484 std::pair<unsigned, unsigned> WavesPerEU) const;
1485 /// \returns Maximum number of VGPRs that meets number of waves per execution
1486 /// unit requirement for function \p F, or number of VGPRs explicitly
1487 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1488 ///
1489 /// \returns Value that meets number of waves per execution unit requirement
1490 /// if explicitly requested value cannot be converted to integer, violates
1491 /// subtarget's specifications, or does not meet number of waves per execution
1492 /// unit requirement.
1493 unsigned getMaxNumVGPRs(const Function &F) const;
1494
1495 unsigned getMaxNumAGPRs(const Function &F) const {
1496 return getMaxNumVGPRs(F);
1497 }
1498
1499 /// \returns Maximum number of VGPRs that meets number of waves per execution
1500 /// unit requirement for function \p MF, or number of VGPRs explicitly
1501 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1502 ///
1503 /// \returns Value that meets number of waves per execution unit requirement
1504 /// if explicitly requested value cannot be converted to integer, violates
1505 /// subtarget's specifications, or does not meet number of waves per execution
1506 /// unit requirement.
1507 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1508
1509 void getPostRAMutations(
1510 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1511 const override;
1512
1513 std::unique_ptr<ScheduleDAGMutation>
1515
1516 bool isWave32() const {
1517 return getWavefrontSize() == 32;
1518 }
1519
1520 bool isWave64() const {
1521 return getWavefrontSize() == 64;
1522 }
1523
1525 return getRegisterInfo()->getBoolRC();
1526 }
1527
1528 /// \returns Maximum number of work groups per compute unit supported by the
1529 /// subtarget and limited by given \p FlatWorkGroupSize.
1530 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1531 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1532 }
1533
1534 /// \returns Minimum flat work group size supported by the subtarget.
1535 unsigned getMinFlatWorkGroupSize() const override {
1537 }
1538
1539 /// \returns Maximum flat work group size supported by the subtarget.
1540 unsigned getMaxFlatWorkGroupSize() const override {
1542 }
1543
1544 /// \returns Number of waves per execution unit required to support the given
1545 /// \p FlatWorkGroupSize.
1546 unsigned
1547 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1548 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1549 }
1550
1551 /// \returns Minimum number of waves per execution unit supported by the
1552 /// subtarget.
1553 unsigned getMinWavesPerEU() const override {
1555 }
1556
1557 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1558 SDep &Dep,
1559 const TargetSchedModel *SchedModel) const override;
1560
1561 // \returns true if it's beneficial on this subtarget for the scheduler to
1562 // cluster stores as well as loads.
1563 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1564
1565 // \returns the number of address arguments from which to enable MIMG NSA
1566 // on supported architectures.
1567 unsigned getNSAThreshold(const MachineFunction &MF) const;
1568
1569 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1570 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1572 // Currently all targets that support the dealloc VGPRs message also require
1573 // the nop.
1574 return true;
1575 }
1576};
1577
1579public:
1580 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1581
1582 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1583
1584 bool hasDispatchPtr() const { return DispatchPtr; }
1585
1586 bool hasQueuePtr() const { return QueuePtr; }
1587
1588 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1589
1590 bool hasDispatchID() const { return DispatchID; }
1591
1592 bool hasFlatScratchInit() const { return FlatScratchInit; }
1593
1594 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1595
1596 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1597
1598 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1599
1600 unsigned getNumFreeUserSGPRs();
1601
1602 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1603
1604 enum UserSGPRID : unsigned {
1614
1615 // Returns the size in number of SGPRs for preload user SGPR field.
1617 switch (ID) {
1619 return 2;
1621 return 4;
1622 case DispatchPtrID:
1623 return 2;
1624 case QueuePtrID:
1625 return 2;
1627 return 2;
1628 case DispatchIdID:
1629 return 2;
1630 case FlatScratchInitID:
1631 return 2;
1633 return 1;
1634 }
1635 llvm_unreachable("Unknown UserSGPRID.");
1636 }
1637
1638 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1639
1640private:
1641 const GCNSubtarget &ST;
1642
1643 // Private memory buffer
1644 // Compute directly in sgpr[0:1]
1645 // Other shaders indirect 64-bits at sgpr[0:1]
1646 bool ImplicitBufferPtr = false;
1647
1648 bool PrivateSegmentBuffer = false;
1649
1650 bool DispatchPtr = false;
1651
1652 bool QueuePtr = false;
1653
1654 bool KernargSegmentPtr = false;
1655
1656 bool DispatchID = false;
1657
1658 bool FlatScratchInit = false;
1659
1660 bool PrivateSegmentSize = false;
1661
1662 unsigned NumKernargPreloadSGPRs = 0;
1663
1664 unsigned NumUsedUserSGPRs = 0;
1665};
1666
1667} // end namespace llvm
1668
1669#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
const char LLVMTargetMachineRef TM
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
Definition: GCNSubtarget.h:939
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition: GCNSubtarget.h:882
bool hasFlat() const
Definition: GCNSubtarget.h:384
bool hasD16Images() const
Definition: GCNSubtarget.h:695
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:64
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:844
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:744
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:227
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:487
bool hasSwap() const
Definition: GCNSubtarget.h:442
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:824
bool HasAtomicFMinFMaxF64FlatInsts
Definition: GCNSubtarget.h:165
bool hasDot2Insts() const
Definition: GCNSubtarget.h:774
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:687
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:748
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:568
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:998
bool has1_5xVGPRs() const
int getLDSBankCount() const
Definition: GCNSubtarget.h:339
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:390
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:476
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:467
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:960
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition: GCNSubtarget.h:840
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:673
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:683
bool flatScratchIsPointer() const
bool hasSDWAMac() const
Definition: GCNSubtarget.h:756
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:822
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:794
bool hasApertureRegs() const
Definition: GCNSubtarget.h:596
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:66
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:471
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:626
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:931
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition: GCNSubtarget.h:836
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:893
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:907
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:964
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:764
bool hasExtendedImageInsts() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:410
bool hasMAIInsts() const
Definition: GCNSubtarget.h:814
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition: GCNSubtarget.h:889
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:634
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:675
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:368
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:526
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:911
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:584
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:265
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:75
bool hasDot1Insts() const
Definition: GCNSubtarget.h:770
bool hasDot3Insts() const
Definition: GCNSubtarget.h:778
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:285
bool HasVGPRSingleUseHintInsts
Definition: GCNSubtarget.h:218
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:562
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:852
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:310
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:196
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
Definition: GCNSubtarget.h:952
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:552
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:454
bool hasDot11Insts() const
Definition: GCNSubtarget.h:810
bool enableFlatScratch() const
Definition: GCNSubtarget.h:651
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:572
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:235
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:656
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:622
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:460
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:608
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:872
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:818
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:277
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
bool hasVscnt() const
Definition: GCNSubtarget.h:903
bool hasMad64_32() const
Definition: GCNSubtarget.h:740
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:289
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:117
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:536
bool hasExtendedWaitCounts() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:691
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:768
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:935
unsigned maxHardClauseLength() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:736
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:664
bool hasDPP() const
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:668
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:297
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:456
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:612
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:269
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:588
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:306
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:760
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition: GCNSubtarget.h:828
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:301
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:647
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:512
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:927
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:580
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:426
bool hasIntClamp() const
Definition: GCNSubtarget.h:356
bool hasGFX10_AEncoding() const
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:679
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:508
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:273
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:790
bool hasGFX940Insts() const
bool hasLshlAddB64() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:376
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:600
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:954
bool hasLDSFPAtomicAddF64() const
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:167
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:630
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
Definition: GCNSubtarget.h:162
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:965
bool HasAtomicFMinFMaxF32FlatInsts
Definition: GCNSubtarget.h:164
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:729
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:335
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:919
bool hasSDWASdst() const
Definition: GCNSubtarget.h:752
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:180
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:446
bool hasFFBL() const
Definition: GCNSubtarget.h:414
bool hasNSAEncoding() const
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:984
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:618
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:558
bool hasMovB64() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition: GCNSubtarget.h:832
bool hasMed3_16() const
Definition: GCNSubtarget.h:422
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasMovrel() const
Definition: GCNSubtarget.h:988
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:846
bool hasBFI() const
Definition: GCNSubtarget.h:402
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:701
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:224
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:992
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:172
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:576
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:343
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool hasFMA() const
Definition: GCNSubtarget.h:438
bool hasDot10Insts() const
Definition: GCNSubtarget.h:806
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:521
bool HasAtomicBufferPkAddBF16Inst
Definition: GCNSubtarget.h:175
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:418
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:645
bool supportsWGP() const
Definition: GCNSubtarget.h:354
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:372
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:848
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:171
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:204
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:856
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:364
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
Definition: GCNSubtarget.h:868
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition: GCNSubtarget.h:179
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
Definition: GCNSubtarget.h:786
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:980
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:854
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:184
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition: GCNSubtarget.h:876
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:406
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:546
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:798
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:450
bool hasSCmpK() const
Definition: GCNSubtarget.h:942
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:293
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:541
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
Definition: GCNSubtarget.h:163
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:530
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:247
Generation getGeneration() const
Definition: GCNSubtarget.h:316
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:923
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:860
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:899
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:604
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:727
bool hasIEEEMinMax3() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:731
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:971
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:493
bool hasRFEHazards() const
Definition: GCNSubtarget.h:503
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:499
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:641
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:715
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:864
bool hasAddr64() const
Definition: GCNSubtarget.h:380
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:223
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:174
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:592
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:482
bool isWave64() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:430
bool hasCARRY() const
Definition: GCNSubtarget.h:434
bool hasPackedTID() const
bool hasFP64() const
Definition: GCNSubtarget.h:360
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:723
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:915
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:94
bool hasFractBug() const
Definition: GCNSubtarget.h:394
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:616
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:320
bool HasMemoryAtomicFaddF32DenormalSupport
Definition: GCNSubtarget.h:170
bool hasDot4Insts() const
Definition: GCNSubtarget.h:782
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
~GCNSubtarget() override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:802
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:660
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:62
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:895
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:281
bool hasBFE() const
Definition: GCNSubtarget.h:398
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:710
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition: GCNSubtarget.h:177
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
bool hasPrivateSegmentSize() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.