LLVM 20.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
46 LLVMAMDHSATrap = 0x02,
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
65 unsigned Gen = INVALID;
67 int LDSBankCount = 0;
69
70 // Possibly statically set by tablegen, but may want to be overridden.
71 bool FastDenormalF32 = false;
72 bool HalfRate64Ops = false;
73 bool FullRate64Ops = false;
74
75 // Dynamically set bits that enable features.
76 bool FlatForGlobal = false;
78 bool BackOffBarrier = false;
80 bool UnalignedAccessMode = false;
81 bool HasApertureRegs = false;
82 bool SupportsXNACK = false;
83 bool KernargPreload = false;
84
85 // This should not be used directly. 'TargetID' tracks the dynamic settings
86 // for XNACK.
87 bool EnableXNACK = false;
88
89 bool EnableTgSplit = false;
90 bool EnableCuMode = false;
91 bool TrapHandler = false;
92 bool EnablePreciseMemory = false;
93
94 // Used as options.
95 bool EnableLoadStoreOpt = false;
97 bool EnableSIScheduler = false;
98 bool EnableDS128 = false;
99 bool EnablePRTStrictNull = false;
100 bool DumpCode = false;
101
102 // Subtarget statically properties set by tablegen
103 bool FP64 = false;
104 bool FMA = false;
105 bool MIMG_R128 = false;
106 bool CIInsts = false;
107 bool GFX8Insts = false;
108 bool GFX9Insts = false;
109 bool GFX90AInsts = false;
110 bool GFX940Insts = false;
111 bool GFX950Insts = false;
112 bool GFX10Insts = false;
113 bool GFX11Insts = false;
114 bool GFX12Insts = false;
115 bool GFX10_3Insts = false;
116 bool GFX7GFX8GFX9Insts = false;
117 bool SGPRInitBug = false;
118 bool UserSGPRInit16Bug = false;
121 bool HasSMemRealTime = false;
122 bool HasIntClamp = false;
123 bool HasFmaMixInsts = false;
124 bool HasMovrel = false;
125 bool HasVGPRIndexMode = false;
127 bool HasScalarStores = false;
128 bool HasScalarAtomics = false;
129 bool HasSDWAOmod = false;
130 bool HasSDWAScalar = false;
131 bool HasSDWASdst = false;
132 bool HasSDWAMac = false;
133 bool HasSDWAOutModsVOPC = false;
134 bool HasDPP = false;
135 bool HasDPP8 = false;
136 bool HasDPALU_DPP = false;
137 bool HasDPPSrc1SGPR = false;
138 bool HasPackedFP32Ops = false;
139 bool HasImageInsts = false;
141 bool HasR128A16 = false;
142 bool HasA16 = false;
143 bool HasG16 = false;
144 bool HasNSAEncoding = false;
146 bool GFX10_AEncoding = false;
147 bool GFX10_BEncoding = false;
148 bool HasDLInsts = false;
149 bool HasFmacF64Inst = false;
150 bool HasDot1Insts = false;
151 bool HasDot2Insts = false;
152 bool HasDot3Insts = false;
153 bool HasDot4Insts = false;
154 bool HasDot5Insts = false;
155 bool HasDot6Insts = false;
156 bool HasDot7Insts = false;
157 bool HasDot8Insts = false;
158 bool HasDot9Insts = false;
159 bool HasDot10Insts = false;
160 bool HasDot11Insts = false;
161 bool HasDot12Insts = false;
162 bool HasDot13Insts = false;
163 bool HasMAIInsts = false;
164 bool HasFP8Insts = false;
166 bool HasCvtFP8Vop1Bug = false;
167 bool HasPkFmacF16Inst = false;
187 bool HasXF32Insts = false;
188 /// The maximum number of instructions that may be placed within an S_CLAUSE,
189 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
190 /// indicates a lack of S_CLAUSE support.
192 bool SupportsSRAMECC = false;
193
194 // This should not be used directly. 'TargetID' tracks the dynamic settings
195 // for SRAMECC.
196 bool EnableSRAMECC = false;
197
198 bool HasNoSdstCMPX = false;
199 bool HasVscnt = false;
200 bool HasGetWaveIdInst = false;
201 bool HasSMemTimeInst = false;
204 bool HasVOP3Literal = false;
205 bool HasNoDataDepHazard = false;
206 bool FlatAddressSpace = false;
207 bool FlatInstOffsets = false;
208 bool FlatGlobalInsts = false;
209 bool FlatScratchInsts = false;
212 bool EnableFlatScratch = false;
214 bool HasGDS = false;
215 bool HasGWS = false;
216 bool AddNoCarryInsts = false;
217 bool HasUnpackedD16VMem = false;
218 bool LDSMisalignedBug = false;
221 bool UnalignedDSAccess = false;
222 bool HasPackedTID = false;
223 bool ScalarizeGlobal = false;
224 bool HasSALUFloatInsts = false;
227 bool HasBitOp3Insts = false;
228 bool HasPrngInst = false;
229 bool HasPermlane16Swap = false;
230 bool HasPermlane32Swap = false;
237 bool HasNSAtoVMEMBug = false;
238 bool HasNSAClauseBug = false;
239 bool HasOffset3fBug = false;
245 bool Has1_5xVGPRs = false;
246 bool HasMADIntraFwdBug = false;
247 bool HasVOPDInsts = false;
252 bool HasAshrPkInsts = false;
256
257 bool RequiresCOV6 = false;
258
259 // Dummy feature to use for assembler in tablegen.
260 bool FeatureDisable = false;
261
262private:
263 SIInstrInfo InstrInfo;
264 SITargetLowering TLInfo;
265 SIFrameLowering FrameLowering;
266
267public:
268 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
269 const GCNTargetMachine &TM);
270 ~GCNSubtarget() override;
271
273 StringRef GPU, StringRef FS);
274
275 /// Diagnose inconsistent subtarget features before attempting to codegen
276 /// function \p F.
277 void checkSubtargetFeatures(const Function &F) const;
278
279 const SIInstrInfo *getInstrInfo() const override {
280 return &InstrInfo;
281 }
282
283 const SIFrameLowering *getFrameLowering() const override {
284 return &FrameLowering;
285 }
286
287 const SITargetLowering *getTargetLowering() const override {
288 return &TLInfo;
289 }
290
291 const SIRegisterInfo *getRegisterInfo() const override {
292 return &InstrInfo.getRegisterInfo();
293 }
294
295 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
296
297 const CallLowering *getCallLowering() const override {
298 return CallLoweringInfo.get();
299 }
300
301 const InlineAsmLowering *getInlineAsmLowering() const override {
302 return InlineAsmLoweringInfo.get();
303 }
304
306 return InstSelector.get();
307 }
308
309 const LegalizerInfo *getLegalizerInfo() const override {
310 return Legalizer.get();
311 }
312
313 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
314 return RegBankInfo.get();
315 }
316
318 return TargetID;
319 }
320
322 return &InstrItins;
323 }
324
326
328 return (Generation)Gen;
329 }
330
331 unsigned getMaxWaveScratchSize() const {
332 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
333 if (getGeneration() >= GFX12) {
334 // 18-bit field in units of 64-dword.
335 return (64 * 4) * ((1 << 18) - 1);
336 }
337 if (getGeneration() == GFX11) {
338 // 15-bit field in units of 64-dword.
339 return (64 * 4) * ((1 << 15) - 1);
340 }
341 // 13-bit field in units of 256-dword.
342 return (256 * 4) * ((1 << 13) - 1);
343 }
344
345 /// Return the number of high bits known to be zero for a frame index.
348 }
349
350 int getLDSBankCount() const {
351 return LDSBankCount;
352 }
353
354 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
355 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
356 }
357
358 unsigned getConstantBusLimit(unsigned Opcode) const;
359
360 /// Returns if the result of this instruction with a 16-bit result returned in
361 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
362 /// the original value.
363 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
364
365 bool supportsWGP() const { return getGeneration() >= GFX10; }
366
367 bool hasIntClamp() const {
368 return HasIntClamp;
369 }
370
371 bool hasFP64() const {
372 return FP64;
373 }
374
375 bool hasMIMG_R128() const {
376 return MIMG_R128;
377 }
378
379 bool hasHWFP64() const {
380 return FP64;
381 }
382
383 bool hasHalfRate64Ops() const {
384 return HalfRate64Ops;
385 }
386
387 bool hasFullRate64Ops() const {
388 return FullRate64Ops;
389 }
390
391 bool hasAddr64() const {
393 }
394
395 bool hasFlat() const {
397 }
398
399 // Return true if the target only has the reverse operand versions of VALU
400 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
401 bool hasOnlyRevVALUShifts() const {
403 }
404
405 bool hasFractBug() const {
407 }
408
409 bool hasBFE() const {
410 return true;
411 }
412
413 bool hasBFI() const {
414 return true;
415 }
416
417 bool hasBFM() const {
418 return hasBFE();
419 }
420
421 bool hasBCNT(unsigned Size) const {
422 return true;
423 }
424
425 bool hasFFBL() const {
426 return true;
427 }
428
429 bool hasFFBH() const {
430 return true;
431 }
432
433 bool hasMed3_16() const {
435 }
436
437 bool hasMin3Max3_16() const {
439 }
440
441 bool hasFmaMixInsts() const {
442 return HasFmaMixInsts;
443 }
444
445 bool hasCARRY() const {
446 return true;
447 }
448
449 bool hasFMA() const {
450 return FMA;
451 }
452
453 bool hasSwap() const {
454 return GFX9Insts;
455 }
456
457 bool hasScalarPackInsts() const {
458 return GFX9Insts;
459 }
460
461 bool hasScalarMulHiInsts() const {
462 return GFX9Insts;
463 }
464
465 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
466
469 }
470
472 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
473 return getGeneration() >= GFX9;
474 }
475
476 /// True if the offset field of DS instructions works as expected. On SI, the
477 /// offset uses a 16-bit adder and does not always wrap properly.
478 bool hasUsableDSOffset() const {
479 return getGeneration() >= SEA_ISLANDS;
480 }
481
484 }
485
486 /// Condition output from div_scale is usable.
489 }
490
491 /// Extra wait hazard is needed in some cases before
492 /// s_cbranch_vccnz/s_cbranch_vccz.
493 bool hasReadVCCZBug() const {
494 return getGeneration() <= SEA_ISLANDS;
495 }
496
497 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
499 return getGeneration() >= GFX10;
500 }
501
502 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
503 /// was written by a VALU instruction.
506 }
507
508 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
509 /// SGPR was written by a VALU Instruction.
512 }
513
514 bool hasRFEHazards() const {
516 }
517
518 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
519 unsigned getSetRegWaitStates() const {
520 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
521 }
522
523 bool dumpCode() const {
524 return DumpCode;
525 }
526
527 /// Return the amount of LDS that can be used that will not restrict the
528 /// occupancy lower than WaveCount.
529 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
530 const Function &) const;
531
534 }
535
536 /// \returns If target supports S_DENORM_MODE.
537 bool hasDenormModeInst() const {
539 }
540
541 bool useFlatForGlobal() const {
542 return FlatForGlobal;
543 }
544
545 /// \returns If target supports ds_read/write_b128 and user enables generation
546 /// of ds_read/write_b128.
547 bool useDS128() const {
548 return CIInsts && EnableDS128;
549 }
550
551 /// \return If target supports ds_read/write_b96/128.
552 bool hasDS96AndDS128() const {
553 return CIInsts;
554 }
555
556 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
557 bool haveRoundOpsF64() const {
558 return CIInsts;
559 }
560
561 /// \returns If MUBUF instructions always perform range checking, even for
562 /// buffer resources used for private memory access.
565 }
566
567 /// \returns If target requires PRT Struct NULL support (zero result registers
568 /// for sparse texture support).
569 bool usePRTStrictNull() const {
570 return EnablePRTStrictNull;
571 }
572
575 }
576
577 /// \returns true if the target supports backing off of s_barrier instructions
578 /// when an exception is raised.
580 return BackOffBarrier;
581 }
582
585 }
586
589 }
590
591 bool hasUnalignedDSAccess() const {
592 return UnalignedDSAccess;
593 }
594
597 }
598
601 }
602
605 }
606
608 return UnalignedAccessMode;
609 }
610
611 bool hasApertureRegs() const {
612 return HasApertureRegs;
613 }
614
615 bool isTrapHandlerEnabled() const {
616 return TrapHandler;
617 }
618
619 bool isXNACKEnabled() const {
620 return TargetID.isXnackOnOrAny();
621 }
622
623 bool isTgSplitEnabled() const {
624 return EnableTgSplit;
625 }
626
627 bool isCuModeEnabled() const {
628 return EnableCuMode;
629 }
630
632
633 bool hasFlatAddressSpace() const {
634 return FlatAddressSpace;
635 }
636
637 bool hasFlatScrRegister() const {
638 return hasFlatAddressSpace();
639 }
640
641 bool hasFlatInstOffsets() const {
642 return FlatInstOffsets;
643 }
644
645 bool hasFlatGlobalInsts() const {
646 return FlatGlobalInsts;
647 }
648
649 bool hasFlatScratchInsts() const {
650 return FlatScratchInsts;
651 }
652
653 // Check if target supports ST addressing mode with FLAT scratch instructions.
654 // The ST addressing mode means no registers are used, either VGPR or SGPR,
655 // but only immediate offset is swizzled and added to the FLAT scratch base.
656 bool hasFlatScratchSTMode() const {
658 }
659
660 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
661
664 }
665
666 bool enableFlatScratch() const {
667 return flatScratchIsArchitected() ||
669 }
670
671 bool hasGlobalAddTidInsts() const {
672 return GFX10_BEncoding;
673 }
674
675 bool hasAtomicCSub() const {
676 return GFX10_BEncoding;
677 }
678
679 bool hasExportInsts() const {
680 return !hasGFX940Insts();
681 }
682
683 bool hasVINTERPEncoding() const {
684 return GFX11Insts;
685 }
686
687 // DS_ADD_F64/DS_ADD_RTN_F64
688 bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
689
691 return getGeneration() >= GFX9;
692 }
693
696 }
697
699 return getGeneration() > GFX9;
700 }
701
702 bool hasD16LoadStore() const {
703 return getGeneration() >= GFX9;
704 }
705
708 }
709
710 bool hasD16Images() const {
712 }
713
714 /// Return if most LDS instructions have an m0 use that require m0 to be
715 /// initialized.
716 bool ldsRequiresM0Init() const {
717 return getGeneration() < GFX9;
718 }
719
720 // True if the hardware rewinds and replays GWS operations if a wave is
721 // preempted.
722 //
723 // If this is false, a GWS operation requires testing if a nack set the
724 // MEM_VIOL bit, and repeating if so.
725 bool hasGWSAutoReplay() const {
726 return getGeneration() >= GFX9;
727 }
728
729 /// \returns if target has ds_gws_sema_release_all instruction.
730 bool hasGWSSemaReleaseAll() const {
731 return CIInsts;
732 }
733
734 /// \returns true if the target has integer add/sub instructions that do not
735 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
736 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
737 /// for saturation.
738 bool hasAddNoCarry() const {
739 return AddNoCarryInsts;
740 }
741
742 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
743
744 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
745
746 bool hasUnpackedD16VMem() const {
747 return HasUnpackedD16VMem;
748 }
749
750 // Covers VS/PS/CS graphics shaders
751 bool isMesaGfxShader(const Function &F) const {
752 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
753 }
754
755 bool hasMad64_32() const {
756 return getGeneration() >= SEA_ISLANDS;
757 }
758
759 bool hasSDWAOmod() const {
760 return HasSDWAOmod;
761 }
762
763 bool hasSDWAScalar() const {
764 return HasSDWAScalar;
765 }
766
767 bool hasSDWASdst() const {
768 return HasSDWASdst;
769 }
770
771 bool hasSDWAMac() const {
772 return HasSDWAMac;
773 }
774
775 bool hasSDWAOutModsVOPC() const {
776 return HasSDWAOutModsVOPC;
777 }
778
779 bool hasDLInsts() const {
780 return HasDLInsts;
781 }
782
783 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
784
785 bool hasDot1Insts() const {
786 return HasDot1Insts;
787 }
788
789 bool hasDot2Insts() const {
790 return HasDot2Insts;
791 }
792
793 bool hasDot3Insts() const {
794 return HasDot3Insts;
795 }
796
797 bool hasDot4Insts() const {
798 return HasDot4Insts;
799 }
800
801 bool hasDot5Insts() const {
802 return HasDot5Insts;
803 }
804
805 bool hasDot6Insts() const {
806 return HasDot6Insts;
807 }
808
809 bool hasDot7Insts() const {
810 return HasDot7Insts;
811 }
812
813 bool hasDot8Insts() const {
814 return HasDot8Insts;
815 }
816
817 bool hasDot9Insts() const {
818 return HasDot9Insts;
819 }
820
821 bool hasDot10Insts() const {
822 return HasDot10Insts;
823 }
824
825 bool hasDot11Insts() const {
826 return HasDot11Insts;
827 }
828
829 bool hasDot12Insts() const {
830 return HasDot12Insts;
831 }
832
833 bool hasDot13Insts() const {
834 return HasDot13Insts;
835 }
836
837 bool hasMAIInsts() const {
838 return HasMAIInsts;
839 }
840
841 bool hasFP8Insts() const {
842 return HasFP8Insts;
843 }
844
846
847 bool hasPkFmacF16Inst() const {
848 return HasPkFmacF16Inst;
849 }
850
853 }
854
857 }
858
861 }
862
865 }
866
868
870
871 bool hasAtomicFaddInsts() const {
873 }
874
876
878
881 }
882
885 }
886
889 }
890
893 }
894
896
897 /// \return true if the target has flat, global, and buffer atomic fadd for
898 /// double.
901 }
902
903 /// \return true if the target's flat, global, and buffer atomic fadd for
904 /// float supports denormal handling.
907 }
908
909 /// \return true if atomic operations targeting fine-grained memory work
910 /// correctly at device scope, in allocations in host or peer PCIe device
911 /// memory.
914 }
915
917
920 }
921
922 bool hasNoSdstCMPX() const {
923 return HasNoSdstCMPX;
924 }
925
926 bool hasVscnt() const {
927 return HasVscnt;
928 }
929
930 bool hasGetWaveIdInst() const {
931 return HasGetWaveIdInst;
932 }
933
934 bool hasSMemTimeInst() const {
935 return HasSMemTimeInst;
936 }
937
940 }
941
944 }
945
946 bool hasVOP3Literal() const {
947 return HasVOP3Literal;
948 }
949
950 bool hasNoDataDepHazard() const {
951 return HasNoDataDepHazard;
952 }
953
955 return getGeneration() < SEA_ISLANDS;
956 }
957
958 bool hasInstPrefetch() const {
959 return getGeneration() == GFX10 || getGeneration() == GFX11;
960 }
961
962 bool hasPrefetch() const { return GFX12Insts; }
963
964 // Has s_cmpk_* instructions.
965 bool hasSCmpK() const { return getGeneration() < GFX12; }
966
967 // Scratch is allocated in 256 dword per wave blocks for the entire
968 // wavefront. When viewed from the perspective of an arbitrary workitem, this
969 // is 4-byte aligned.
970 //
971 // Only 4-byte alignment is really needed to access anything. Transformations
972 // on the pointer value itself may rely on the alignment / known low bits of
973 // the pointer. Set this to something above the minimum to avoid needing
974 // dynamic realignment in common cases.
975 Align getStackAlignment() const { return Align(16); }
976
977 bool enableMachineScheduler() const override {
978 return true;
979 }
980
981 bool useAA() const override;
982
983 bool enableSubRegLiveness() const override {
984 return true;
985 }
986
989
990 // static wrappers
991 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
992
993 // XXX - Why is this here if it isn't in the default pass set?
994 bool enableEarlyIfConversion() const override {
995 return true;
996 }
997
999 unsigned NumRegionInstrs) const override;
1000
1001 void mirFileLoaded(MachineFunction &MF) const override;
1002
1003 unsigned getMaxNumUserSGPRs() const {
1004 return AMDGPU::getMaxNumUserSGPRs(*this);
1005 }
1006
1007 bool hasSMemRealTime() const {
1008 return HasSMemRealTime;
1009 }
1010
1011 bool hasMovrel() const {
1012 return HasMovrel;
1013 }
1014
1015 bool hasVGPRIndexMode() const {
1016 return HasVGPRIndexMode;
1017 }
1018
1019 bool useVGPRIndexMode() const;
1020
1022 return getGeneration() >= VOLCANIC_ISLANDS;
1023 }
1024
1026
1027 bool hasScalarStores() const {
1028 return HasScalarStores;
1029 }
1030
1031 bool hasScalarAtomics() const {
1032 return HasScalarAtomics;
1033 }
1034
1035 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1036 bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
1037
1038 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1039 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1040
1041 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1042 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1043
1044 bool hasDPP() const {
1045 return HasDPP;
1046 }
1047
1048 bool hasDPPBroadcasts() const {
1049 return HasDPP && getGeneration() < GFX10;
1050 }
1051
1053 return HasDPP && getGeneration() < GFX10;
1054 }
1055
1056 bool hasDPP8() const {
1057 return HasDPP8;
1058 }
1059
1060 bool hasDPALU_DPP() const {
1061 return HasDPALU_DPP;
1062 }
1063
1064 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1065
1066 bool hasPackedFP32Ops() const {
1067 return HasPackedFP32Ops;
1068 }
1069
1070 // Has V_PK_MOV_B32 opcode
1071 bool hasPkMovB32() const {
1072 return GFX90AInsts;
1073 }
1074
1076 return getGeneration() >= GFX10 || hasGFX940Insts();
1077 }
1078
1079 bool hasImageInsts() const {
1080 return HasImageInsts;
1081 }
1082
1084 return HasExtendedImageInsts;
1085 }
1086
1087 bool hasR128A16() const {
1088 return HasR128A16;
1089 }
1090
1091 bool hasA16() const { return HasA16; }
1092
1093 bool hasG16() const { return HasG16; }
1094
1095 bool hasOffset3fBug() const {
1096 return HasOffset3fBug;
1097 }
1098
1100
1102
1103 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1104
1106
1108
1109 bool hasNSAEncoding() const { return HasNSAEncoding; }
1110
1111 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1112
1114
1115 unsigned getNSAMaxSize(bool HasSampler = false) const {
1116 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1117 }
1118
1119 bool hasGFX10_AEncoding() const {
1120 return GFX10_AEncoding;
1121 }
1122
1123 bool hasGFX10_BEncoding() const {
1124 return GFX10_BEncoding;
1125 }
1126
1127 bool hasGFX10_3Insts() const {
1128 return GFX10_3Insts;
1129 }
1130
1131 bool hasMadF16() const;
1132
1133 bool hasMovB64() const { return GFX940Insts; }
1134
1135 bool hasLshlAddB64() const { return GFX940Insts; }
1136
1137 bool enableSIScheduler() const {
1138 return EnableSIScheduler;
1139 }
1140
1141 bool loadStoreOptEnabled() const {
1142 return EnableLoadStoreOpt;
1143 }
1144
1145 bool hasSGPRInitBug() const {
1146 return SGPRInitBug;
1147 }
1148
1150 return UserSGPRInit16Bug && isWave32();
1151 }
1152
1154
1157 }
1158
1161 }
1162
1165 }
1166
1167 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1169 return CIInsts;
1170 }
1171
1174 }
1175
1179 }
1180
1183 }
1184
1187 }
1188
1191 }
1192
1195 }
1196
1199 }
1200
1201 bool hasLDSMisalignedBug() const {
1202 return LDSMisalignedBug && !EnableCuMode;
1203 }
1204
1206 return HasInstFwdPrefetchBug;
1207 }
1208
1210 return HasVcmpxExecWARHazard;
1211 }
1212
1215 }
1216
1217 // Shift amount of a 64 bit shift cannot be a highest allocated register
1218 // if also at the end of the allocation block.
1220 return GFX90AInsts && !GFX940Insts;
1221 }
1222
1223 // Has one cycle hazard on transcendental instruction feeding a
1224 // non transcendental VALU.
1225 bool hasTransForwardingHazard() const { return GFX940Insts; }
1226
1227 // Has one cycle hazard on a VALU instruction partially writing dst with
1228 // a shift of result bits feeding another VALU instruction.
1230
1231 // Cannot use op_sel with v_dot instructions.
1232 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1233
1234 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1235 bool hasVDecCoExecHazard() const {
1236 return GFX940Insts;
1237 }
1238
1239 bool hasNSAtoVMEMBug() const {
1240 return HasNSAtoVMEMBug;
1241 }
1242
1243 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1244
1245 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1246
1247 bool hasGFX90AInsts() const { return GFX90AInsts; }
1248
1250 return getGeneration() == GFX10;
1251 }
1252
1253 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1254
1255 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1256
1257 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1258
1260 return getGeneration() == GFX11;
1261 }
1262
1264
1266
1268
1269 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1270
1271 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1272
1273 bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }
1274
1275 /// Return if operations acting on VGPR tuples require even alignment.
1276 bool needsAlignedVGPRs() const { return GFX90AInsts; }
1277
1278 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1279 bool hasSPackHL() const { return GFX11Insts; }
1280
1281 /// Return true if the target's EXP instruction has the COMPR flag, which
1282 /// affects the meaning of the EN (enable) bits.
1283 bool hasCompressedExport() const { return !GFX11Insts; }
1284
1285 /// Return true if the target's EXP instruction supports the NULL export
1286 /// target.
1287 bool hasNullExportTarget() const { return !GFX11Insts; }
1288
1289 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1290
1291 bool hasVOPDInsts() const { return HasVOPDInsts; }
1292
1294
1295 /// Return true if the target has the S_DELAY_ALU instruction.
1296 bool hasDelayAlu() const { return GFX11Insts; }
1297
1298 bool hasPackedTID() const { return HasPackedTID; }
1299
1300 // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1301 // hasGFX90AInsts is also true.
1302 bool hasGFX940Insts() const { return GFX940Insts; }
1303
1304 // GFX950 is a derivation to GFX940. hasGFX950Insts() implies that
1305 // hasGFX940Insts and hasGFX90AInsts are also true.
1306 bool hasGFX950Insts() const { return GFX950Insts; }
1307
1308 /// Returns true if the target supports
1309 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1310 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1311 bool hasLDSLoadB96_B128() const {
1312 return hasGFX950Insts();
1313 }
1314
1315 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1316
1318
1320
1322
1324
1325 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1326 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1327 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1328
1329 /// \returns true if inline constants are not supported for F16 pseudo
1330 /// scalar transcendentals.
1332 return getGeneration() == GFX12;
1333 }
1334
1335 /// \returns true if the target has instructions with xf32 format support.
1336 bool hasXF32Insts() const { return HasXF32Insts; }
1337
1338 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1339
1340 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1341 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1342 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1343
1346 }
1347
1350 }
1351
1354 }
1355
1356 /// \returns The maximum number of instructions that can be enclosed in an
1357 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1358 /// instruction.
1359 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1360
1361 bool hasPrngInst() const { return HasPrngInst; }
1362
1363 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1364 /// SGPRs
1365 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1366
1367 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1368 /// VGPRs
1369 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1370
1371 /// Return occupancy for the given function. Used LDS and a number of
1372 /// registers if provided.
1373 /// Note, occupancy can be affected by the scratch allocation as well, but
1374 /// we do not have enough information to compute it.
1375 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1376 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1377
1378 /// \returns true if the flat_scratch register should be initialized with the
1379 /// pointer to the wave's scratch memory rather than a size and offset.
1382 }
1383
1384 /// \returns true if the flat_scratch register is initialized by the HW.
1385 /// In this case it is readonly.
1387
1388 /// \returns true if the architected SGPRs are enabled.
1390
1391 /// \returns true if Global Data Share is supported.
1392 bool hasGDS() const { return HasGDS; }
1393
1394 /// \returns true if Global Wave Sync is supported.
1395 bool hasGWS() const { return HasGWS; }
1396
1397 /// \returns true if the machine has merged shaders in which s0-s7 are
1398 /// reserved by the hardware and user SGPRs start at s8
1399 bool hasMergedShaders() const {
1400 return getGeneration() >= GFX9;
1401 }
1402
1403 // \returns true if the target supports the pre-NGG legacy geometry path.
1404 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1405
1406 // \returns true if preloading kernel arguments is supported.
1407 bool hasKernargPreload() const { return KernargPreload; }
1408
1409 // \returns true if the target has split barriers feature
1410 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1411
1412 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1413 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1414
1415 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1416 // no-return form.
1418
1419 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1420 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1421
1422 // \returns true if the target has IEEE kernel descriptor mode bit
1423 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1424
1425 // \returns true if the target has IEEE fminimum/fmaximum instructions
1426 bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
1427
1428 // \returns true if the target has IEEE fminimum3/fmaximum3 instructions
1429 bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
1430
1431 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1432 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1433
1434 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1435 /// values.
1436 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1437
1438 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1439 // of sign-extending.
1440 bool hasGetPCZeroExtension() const { return GFX12Insts; }
1441
1442 /// \returns SGPR allocation granularity supported by the subtarget.
1443 unsigned getSGPRAllocGranule() const {
1445 }
1446
1447 /// \returns SGPR encoding granularity supported by the subtarget.
1448 unsigned getSGPREncodingGranule() const {
1450 }
1451
1452 /// \returns Total number of SGPRs supported by the subtarget.
1453 unsigned getTotalNumSGPRs() const {
1455 }
1456
1457 /// \returns Addressable number of SGPRs supported by the subtarget.
1458 unsigned getAddressableNumSGPRs() const {
1460 }
1461
1462 /// \returns Minimum number of SGPRs that meets the given number of waves per
1463 /// execution unit requirement supported by the subtarget.
1464 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1465 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1466 }
1467
1468 /// \returns Maximum number of SGPRs that meets the given number of waves per
1469 /// execution unit requirement supported by the subtarget.
1470 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1471 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1472 }
1473
1474 /// \returns Reserved number of SGPRs. This is common
1475 /// utility function called by MachineFunction and
1476 /// Function variants of getReservedNumSGPRs.
1477 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1478 /// \returns Reserved number of SGPRs for given machine function \p MF.
1479 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1480
1481 /// \returns Reserved number of SGPRs for given function \p F.
1482 unsigned getReservedNumSGPRs(const Function &F) const;
1483
1484 /// \returns max num SGPRs. This is the common utility
1485 /// function called by MachineFunction and Function
1486 /// variants of getMaxNumSGPRs.
1487 unsigned getBaseMaxNumSGPRs(const Function &F,
1488 std::pair<unsigned, unsigned> WavesPerEU,
1489 unsigned PreloadedSGPRs,
1490 unsigned ReservedNumSGPRs) const;
1491
1492 /// \returns Maximum number of SGPRs that meets number of waves per execution
1493 /// unit requirement for function \p MF, or number of SGPRs explicitly
1494 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1495 ///
1496 /// \returns Value that meets number of waves per execution unit requirement
1497 /// if explicitly requested value cannot be converted to integer, violates
1498 /// subtarget's specifications, or does not meet number of waves per execution
1499 /// unit requirement.
1500 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1501
1502 /// \returns Maximum number of SGPRs that meets number of waves per execution
1503 /// unit requirement for function \p F, or number of SGPRs explicitly
1504 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1505 ///
1506 /// \returns Value that meets number of waves per execution unit requirement
1507 /// if explicitly requested value cannot be converted to integer, violates
1508 /// subtarget's specifications, or does not meet number of waves per execution
1509 /// unit requirement.
1510 unsigned getMaxNumSGPRs(const Function &F) const;
1511
1512 /// \returns VGPR allocation granularity supported by the subtarget.
1513 unsigned getVGPRAllocGranule() const {
1515 }
1516
1517 /// \returns VGPR encoding granularity supported by the subtarget.
1518 unsigned getVGPREncodingGranule() const {
1520 }
1521
1522 /// \returns Total number of VGPRs supported by the subtarget.
1523 unsigned getTotalNumVGPRs() const {
1525 }
1526
1527 /// \returns Addressable number of architectural VGPRs supported by the
1528 /// subtarget.
1531 }
1532
1533 /// \returns Addressable number of VGPRs supported by the subtarget.
1534 unsigned getAddressableNumVGPRs() const {
1536 }
1537
1538 /// \returns the minimum number of VGPRs that will prevent achieving more than
1539 /// the specified number of waves \p WavesPerEU.
1540 unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1541 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1542 }
1543
1544 /// \returns the maximum number of VGPRs that can be used and still achieved
1545 /// at least the specified number of waves \p WavesPerEU.
1546 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1547 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1548 }
1549
1550 /// \returns max num VGPRs. This is the common utility function
1551 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1552 unsigned getBaseMaxNumVGPRs(const Function &F,
1553 std::pair<unsigned, unsigned> WavesPerEU) const;
1554 /// \returns Maximum number of VGPRs that meets number of waves per execution
1555 /// unit requirement for function \p F, or number of VGPRs explicitly
1556 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1557 ///
1558 /// \returns Value that meets number of waves per execution unit requirement
1559 /// if explicitly requested value cannot be converted to integer, violates
1560 /// subtarget's specifications, or does not meet number of waves per execution
1561 /// unit requirement.
1562 unsigned getMaxNumVGPRs(const Function &F) const;
1563
1564 unsigned getMaxNumAGPRs(const Function &F) const {
1565 return getMaxNumVGPRs(F);
1566 }
1567
1568 /// \returns Maximum number of VGPRs that meets number of waves per execution
1569 /// unit requirement for function \p MF, or number of VGPRs explicitly
1570 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1571 ///
1572 /// \returns Value that meets number of waves per execution unit requirement
1573 /// if explicitly requested value cannot be converted to integer, violates
1574 /// subtarget's specifications, or does not meet number of waves per execution
1575 /// unit requirement.
1576 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1577
1578 void getPostRAMutations(
1579 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1580 const override;
1581
1582 std::unique_ptr<ScheduleDAGMutation>
1584
1585 bool isWave32() const {
1586 return getWavefrontSize() == 32;
1587 }
1588
1589 bool isWave64() const {
1590 return getWavefrontSize() == 64;
1591 }
1592
1593 /// Returns if the wavesize of this subtarget is known reliable. This is false
1594 /// only for the a default target-cpu that does not have an explicit
1595 /// +wavefrontsize target feature.
1596 bool isWaveSizeKnown() const {
1597 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1598 hasFeature(AMDGPU::FeatureWavefrontSize64);
1599 }
1600
1602 return getRegisterInfo()->getBoolRC();
1603 }
1604
1605 /// \returns Maximum number of work groups per compute unit supported by the
1606 /// subtarget and limited by given \p FlatWorkGroupSize.
1607 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1608 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1609 }
1610
1611 /// \returns Minimum flat work group size supported by the subtarget.
1612 unsigned getMinFlatWorkGroupSize() const override {
1614 }
1615
1616 /// \returns Maximum flat work group size supported by the subtarget.
1617 unsigned getMaxFlatWorkGroupSize() const override {
1619 }
1620
1621 /// \returns Number of waves per execution unit required to support the given
1622 /// \p FlatWorkGroupSize.
1623 unsigned
1624 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1625 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1626 }
1627
1628 /// \returns Minimum number of waves per execution unit supported by the
1629 /// subtarget.
1630 unsigned getMinWavesPerEU() const override {
1632 }
1633
1634 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1635 SDep &Dep,
1636 const TargetSchedModel *SchedModel) const override;
1637
1638 // \returns true if it's beneficial on this subtarget for the scheduler to
1639 // cluster stores as well as loads.
1640 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1641
1642 // \returns the number of address arguments from which to enable MIMG NSA
1643 // on supported architectures.
1644 unsigned getNSAThreshold(const MachineFunction &MF) const;
1645
1646 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1647 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1649 // Currently all targets that support the dealloc VGPRs message also require
1650 // the nop.
1651 return true;
1652 }
1653
1655 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1656 // to the same register.
1657 return false;
1658 }
1659};
1660
1662public:
1663 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1664
1665 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1666
1667 bool hasDispatchPtr() const { return DispatchPtr; }
1668
1669 bool hasQueuePtr() const { return QueuePtr; }
1670
1671 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1672
1673 bool hasDispatchID() const { return DispatchID; }
1674
1675 bool hasFlatScratchInit() const { return FlatScratchInit; }
1676
1677 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1678
1679 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1680
1681 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1682
1683 unsigned getNumFreeUserSGPRs();
1684
1685 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1686
1687 enum UserSGPRID : unsigned {
1697
1698 // Returns the size in number of SGPRs for preload user SGPR field.
1700 switch (ID) {
1702 return 2;
1704 return 4;
1705 case DispatchPtrID:
1706 return 2;
1707 case QueuePtrID:
1708 return 2;
1710 return 2;
1711 case DispatchIdID:
1712 return 2;
1713 case FlatScratchInitID:
1714 return 2;
1716 return 1;
1717 }
1718 llvm_unreachable("Unknown UserSGPRID.");
1719 }
1720
1721 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1722
1723private:
1724 const GCNSubtarget &ST;
1725
1726 // Private memory buffer
1727 // Compute directly in sgpr[0:1]
1728 // Other shaders indirect 64-bits at sgpr[0:1]
1729 bool ImplicitBufferPtr = false;
1730
1731 bool PrivateSegmentBuffer = false;
1732
1733 bool DispatchPtr = false;
1734
1735 bool QueuePtr = false;
1736
1737 bool KernargSegmentPtr = false;
1738
1739 bool DispatchID = false;
1740
1741 bool FlatScratchInit = false;
1742
1743 bool PrivateSegmentSize = false;
1744
1745 unsigned NumKernargPreloadSGPRs = 0;
1746
1747 unsigned NumUsedUserSGPRs = 0;
1748};
1749
1750} // end namespace llvm
1751
1752#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
Definition: GCNSubtarget.h:962
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition: GCNSubtarget.h:905
bool hasFlat() const
Definition: GCNSubtarget.h:395
bool hasD16Images() const
Definition: GCNSubtarget.h:710
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:66
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:867
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:759
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:236
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:498
bool hasSwap() const
Definition: GCNSubtarget.h:453
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:847
bool HasAtomicFMinFMaxF64FlatInsts
Definition: GCNSubtarget.h:171
bool hasDot2Insts() const
Definition: GCNSubtarget.h:789
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:702
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:763
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:579
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
Definition: GCNSubtarget.h:350
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:401
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:487
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:478
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:983
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition: GCNSubtarget.h:863
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:688
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:698
bool flatScratchIsPointer() const
bool hasSDWAMac() const
Definition: GCNSubtarget.h:771
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:845
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:809
bool hasApertureRegs() const
Definition: GCNSubtarget.h:611
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:68
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:482
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:641
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:954
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition: GCNSubtarget.h:859
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:916
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:930
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:987
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasDLInsts() const
Definition: GCNSubtarget.h:779
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:421
bool hasMAIInsts() const
Definition: GCNSubtarget.h:837
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition: GCNSubtarget.h:912
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:649
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:690
bool hasArchitectedSGPRs() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:379
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:537
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:934
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:595
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:279
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:77
bool hasDot1Insts() const
Definition: GCNSubtarget.h:785
bool hasDot3Insts() const
Definition: GCNSubtarget.h:793
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:301
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:573
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:875
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:321
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:203
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
Definition: GCNSubtarget.h:975
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:563
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:465
bool hasDot11Insts() const
Definition: GCNSubtarget.h:825
bool enableFlatScratch() const
Definition: GCNSubtarget.h:666
bool hasMadF16() const
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:583
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:244
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:671
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:637
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:471
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:623
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:895
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:841
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:291
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
bool hasVscnt() const
Definition: GCNSubtarget.h:926
bool hasMad64_32() const
Definition: GCNSubtarget.h:755
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:305
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:120
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:547
bool hasExtendedWaitCounts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:706
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:783
bool hasXF32Insts() const
bool hasInstPrefetch() const
Definition: GCNSubtarget.h:958
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:751
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:679
bool hasDPP() const
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:683
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:313
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:467
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:627
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:283
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:599
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:775
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition: GCNSubtarget.h:851
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasGFX950Insts() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:317
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:662
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:523
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:950
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:591
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:437
bool hasIntClamp() const
Definition: GCNSubtarget.h:367
bool hasGFX10_AEncoding() const
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:694
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:519
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:287
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:805
bool hasGFX940Insts() const
bool hasLshlAddB64() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:387
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:615
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:977
bool hasLDSFPAtomicAddF64() const
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:173
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:645
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
Definition: GCNSubtarget.h:168
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:988
bool HasAtomicFMinFMaxF32FlatInsts
Definition: GCNSubtarget.h:170
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:744
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:346
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:942
bool hasSDWASdst() const
Definition: GCNSubtarget.h:767
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:186
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:457
bool hasFFBL() const
Definition: GCNSubtarget.h:425
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:633
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:569
bool hasMovB64() const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition: GCNSubtarget.h:855
bool hasMed3_16() const
Definition: GCNSubtarget.h:433
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
Definition: GCNSubtarget.h:603
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:869
bool hasBFI() const
Definition: GCNSubtarget.h:413
bool hasDot13Insts() const
Definition: GCNSubtarget.h:833
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:716
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:233
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:178
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:587
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:354
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool HasRequiredExportPriority
Definition: GCNSubtarget.h:250
bool hasFMA() const
Definition: GCNSubtarget.h:449
bool hasDot10Insts() const
Definition: GCNSubtarget.h:821
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:532
bool HasAtomicBufferPkAddBF16Inst
Definition: GCNSubtarget.h:181
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:429
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:660
bool supportsWGP() const
Definition: GCNSubtarget.h:365
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:383
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:871
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:177
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:211
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:879
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:375
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
Definition: GCNSubtarget.h:891
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition: GCNSubtarget.h:185
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
Definition: GCNSubtarget.h:801
unsigned getMaxNumUserSGPRs() const
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:877
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:191
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition: GCNSubtarget.h:899
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:417
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:557
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:813
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:461
bool hasSCmpK() const
Definition: GCNSubtarget.h:965
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:309
bool hasDot12Insts() const
Definition: GCNSubtarget.h:829
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:552
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
Definition: GCNSubtarget.h:169
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:541
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
Definition: GCNSubtarget.h:327
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:946
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:883
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:922
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:619
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:742
bool hasIEEEMinMax3() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:746
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:994
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:504
bool hasRFEHazards() const
Definition: GCNSubtarget.h:514
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:510
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:656
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:730
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:887
bool hasAddr64() const
Definition: GCNSubtarget.h:391
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:232
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:180
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:607
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:493
bool isWave64() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:441
bool hasCARRY() const
Definition: GCNSubtarget.h:445
bool hasPackedTID() const
bool hasFP64() const
Definition: GCNSubtarget.h:371
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:738
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:938
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:96
bool hasFractBug() const
Definition: GCNSubtarget.h:405
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:631
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:331
bool HasMemoryAtomicFaddF32DenormalSupport
Definition: GCNSubtarget.h:176
bool hasDot4Insts() const
Definition: GCNSubtarget.h:797
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:817
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:675
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:64
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:918
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:297
bool hasBFE() const
Definition: GCNSubtarget.h:409
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:725
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition: GCNSubtarget.h:183
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
bool hasPrivateSegmentSize() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:49
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:227
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.