LLVM 23.0.0git
AMDGPUBaseInfo.h
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
15#include "llvm/IR/CallingConv.h"
16#include "llvm/IR/InstrTypes.h"
17#include "llvm/IR/Module.h"
19#include <array>
20#include <functional>
21#include <utility>
22
23// Pull in OpName enum definition and getNamedOperandIdx() declaration.
24#define GET_INSTRINFO_OPERAND_ENUM
25#include "AMDGPUGenInstrInfo.inc"
26
28
29namespace llvm {
30
31struct Align;
32class Argument;
33class Function;
34class GlobalValue;
35class MachineInstr;
36class MCInstrInfo;
37class MCRegisterClass;
38class MCRegisterInfo;
39class MCSubtargetInfo;
40class MDNode;
41class StringRef;
42class Triple;
43class raw_ostream;
44
45namespace AMDGPU {
46
47struct AMDGPUMCKernelCodeT;
48struct IsaVersion;
49
50/// Generic target versions emitted by this version of LLVM.
51///
52/// These numbers are incremented every time a codegen breaking change occurs
53/// within a generic family.
54namespace GenericVersion {
55static constexpr unsigned GFX9 = 1;
56static constexpr unsigned GFX9_4 = 1;
57static constexpr unsigned GFX10_1 = 1;
58static constexpr unsigned GFX10_3 = 1;
59static constexpr unsigned GFX11 = 1;
60static constexpr unsigned GFX12 = 1;
61static constexpr unsigned GFX12_5 = 1;
62} // namespace GenericVersion
63
64enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
65
66enum class FPType { None, FP4, FP8 };
67
68/// \returns True if \p STI is AMDHSA.
69bool isHsaAbi(const MCSubtargetInfo &STI);
70
71/// \returns Code object version from the IR module flag.
72unsigned getAMDHSACodeObjectVersion(const Module &M);
73
74/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
75unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
76
77/// \returns The default HSA code object version. This should only be used when
78/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
79/// flag or a .amdhsa_code_object_version directive)
81
82/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
83/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
84uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
85
86/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
87unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
88
89/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
90unsigned getHostcallImplicitArgPosition(unsigned COV);
91
92unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
93unsigned getCompletionActionImplicitArgPosition(unsigned COV);
94
96 unsigned Format;
97 unsigned BitsPerComp;
98 unsigned NumComponents;
99 unsigned NumFormat;
100 unsigned DataFormat;
101};
102
108
115
119
121 unsigned T16Op;
122 unsigned HiOp;
123 unsigned LoOp;
124};
125
130
131#define GET_MIMGBaseOpcode_DECL
132#define GET_MIMGDim_DECL
133#define GET_MIMGEncoding_DECL
134#define GET_MIMGLZMapping_DECL
135#define GET_MIMGMIPMapping_DECL
136#define GET_MIMGBiASMapping_DECL
137#define GET_MAIInstInfoTable_DECL
138#define GET_isMFMA_F8F6F4Table_DECL
139#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
140#define GET_True16D16Table_DECL
141#define GET_WMMAInstInfoTable_DECL
142#include "AMDGPUGenSearchableTables.inc"
143
144namespace IsaInfo {
145
146enum {
147 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
148 // doesn't spill SGPRs as much as when 80 is set.
151};
152
154
156private:
157 const MCSubtargetInfo &STI;
158 TargetIDSetting XnackSetting;
159 TargetIDSetting SramEccSetting;
160
161public:
162 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
163 ~AMDGPUTargetID() = default;
164
165 /// \return True if the current xnack setting is not "Unsupported".
166 bool isXnackSupported() const {
167 return XnackSetting != TargetIDSetting::Unsupported;
168 }
169
170 /// \returns True if the current xnack setting is "On" or "Any".
171 bool isXnackOnOrAny() const {
172 return XnackSetting == TargetIDSetting::On ||
173 XnackSetting == TargetIDSetting::Any;
174 }
175
176 /// \returns True if current xnack setting is "On" or "Off",
177 /// false otherwise.
182
183 /// \returns The current xnack TargetIDSetting, possible options are
184 /// "Unsupported", "Any", "Off", and "On".
185 TargetIDSetting getXnackSetting() const { return XnackSetting; }
186
187 /// Sets xnack setting to \p NewXnackSetting.
188 void setXnackSetting(TargetIDSetting NewXnackSetting) {
189 XnackSetting = NewXnackSetting;
190 }
191
192 /// \return True if the current sramecc setting is not "Unsupported".
193 bool isSramEccSupported() const {
194 return SramEccSetting != TargetIDSetting::Unsupported;
195 }
196
197 /// \returns True if the current sramecc setting is "On" or "Any".
198 bool isSramEccOnOrAny() const {
199 return SramEccSetting == TargetIDSetting::On ||
200 SramEccSetting == TargetIDSetting::Any;
201 }
202
203 /// \returns True if current sramecc setting is "On" or "Off",
204 /// false otherwise.
209
210 /// \returns The current sramecc TargetIDSetting, possible options are
211 /// "Unsupported", "Any", "Off", and "On".
212 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
213
214 /// Sets sramecc setting to \p NewSramEccSetting.
215 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
216 SramEccSetting = NewSramEccSetting;
217 }
218
221
222 /// Write string representation to \p OS
223 void print(raw_ostream &OS) const;
224
225 /// \returns String representation of an object.
226 std::string toString() const;
227};
228
230 const AMDGPUTargetID &TargetID) {
231 TargetID.print(OS);
232 return OS;
233}
234
235/// \returns Wavefront size for given subtarget \p STI.
236unsigned getWavefrontSize(const MCSubtargetInfo *STI);
237
238/// \returns Local memory size in bytes for given subtarget \p STI.
239unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
240
241/// \returns Maximum addressable local memory size in bytes for given subtarget
242/// \p STI.
244
245/// \returns Number of execution units per compute unit for given subtarget \p
246/// STI.
247unsigned getEUsPerCU(const MCSubtargetInfo *STI);
248
249/// \returns Maximum number of work groups per compute unit for given subtarget
250/// \p STI and limited by given \p FlatWorkGroupSize.
251unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
252 unsigned FlatWorkGroupSize);
253
254/// \returns Minimum number of waves per execution unit for given subtarget \p
255/// STI.
256unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
257
258/// \returns Maximum number of waves per execution unit for given subtarget \p
259/// STI without any kind of limitation.
260unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
261
262/// \returns Number of waves per execution unit required to support the given \p
263/// FlatWorkGroupSize.
265 unsigned FlatWorkGroupSize);
266
267/// \returns Minimum flat work group size for given subtarget \p STI.
268unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
269
270/// \returns Maximum flat work group size
271constexpr unsigned getMaxFlatWorkGroupSize() {
272 // Some subtargets allow encoding 2048, but this isn't tested or supported.
273 return 1024;
274}
275
276/// \returns Number of waves per work group for given subtarget \p STI and
277/// \p FlatWorkGroupSize.
278unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
279 unsigned FlatWorkGroupSize);
280
281/// \returns SGPR allocation granularity for given subtarget \p STI.
282unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
283
284/// \returns SGPR encoding granularity for given subtarget \p STI.
285unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
286
287/// \returns Total number of SGPRs for given subtarget \p STI.
288unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
289
290/// \returns Addressable number of SGPRs for given subtarget \p STI.
291unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
292
293/// \returns Minimum number of SGPRs that meets the given number of waves per
294/// execution unit requirement for given subtarget \p STI.
295unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
296
297/// \returns Maximum number of SGPRs that meets the given number of waves per
298/// execution unit requirement for given subtarget \p STI.
299unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
300 bool Addressable);
301
302/// \returns Number of extra SGPRs implicitly required by given subtarget \p
303/// STI when the given special registers are used.
304unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
305 bool FlatScrUsed, bool XNACKUsed);
306
307/// \returns Number of extra SGPRs implicitly required by given subtarget \p
308/// STI when the given special registers are used. XNACK is inferred from
309/// \p STI.
310unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
311 bool FlatScrUsed);
312
313/// \returns Number of SGPR blocks needed for given subtarget \p STI when
314/// \p NumSGPRs are used. \p NumSGPRs should already include any special
315/// register counts.
316unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
317
318/// \returns VGPR allocation granularity for given subtarget \p STI.
319///
320/// For subtargets which support it, \p EnableWavefrontSize32 should match
321/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
322unsigned
323getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
324 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
325
326/// \returns VGPR encoding granularity for given subtarget \p STI.
327///
328/// For subtargets which support it, \p EnableWavefrontSize32 should match
329/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
331 const MCSubtargetInfo *STI,
332 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
333
334/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
335/// returns the allocation granule for ArchVGPRs.
336unsigned getArchVGPRAllocGranule();
337
338/// \returns Total number of VGPRs for given subtarget \p STI.
339unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
340
341/// \returns Addressable number of architectural VGPRs for a given subtarget \p
342/// STI.
344
345/// \returns Addressable number of VGPRs for given subtarget \p STI.
346unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
347 unsigned DynamicVGPRBlockSize);
348
349/// \returns Minimum number of VGPRs that meets given number of waves per
350/// execution unit requirement for given subtarget \p STI.
351unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
352 unsigned DynamicVGPRBlockSize);
353
354/// \returns Maximum number of VGPRs that meets given number of waves per
355/// execution unit requirement for given subtarget \p STI.
356unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
357 unsigned DynamicVGPRBlockSize);
358
359/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
360/// subtarget \p STI.
362 unsigned NumVGPRs,
363 unsigned DynamicVGPRBlockSize);
364
365/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
366/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
367unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
368 unsigned MaxWaves,
369 unsigned TotalNumVGPRs);
370
371/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
372/// Gen.
373unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
375
376/// \returns Number of VGPR blocks needed for given subtarget \p STI when
377/// \p NumVGPRs are used. We actually return the number of blocks -1, since
378/// that's what we encode.
379///
380/// For subtargets which support it, \p EnableWavefrontSize32 should match the
381/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
383 const MCSubtargetInfo *STI, unsigned NumVGPRs,
384 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
385
386/// \returns Number of VGPR blocks that need to be allocated for the given
387/// subtarget \p STI when \p NumVGPRs are used.
389 const MCSubtargetInfo *STI, unsigned NumVGPRs,
390 unsigned DynamicVGPRBlockSize,
391 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
392
393} // end namespace IsaInfo
394
395// Represents a field in an encoded value.
396template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
398 static_assert(HighBit >= LowBit, "Invalid bit range!");
399 static constexpr unsigned Offset = LowBit;
400 static constexpr unsigned Width = HighBit - LowBit + 1;
401
403 static constexpr ValueType Default = D;
404
407
408 constexpr uint64_t encode() const { return Value; }
409 static ValueType decode(uint64_t Encoded) { return Encoded; }
410};
411
412// Represents a single bit in an encoded value.
413template <unsigned Bit, unsigned D = 0>
415
416// A helper for encoding and decoding multiple fields.
417template <typename... Fields> struct EncodingFields {
418 static constexpr uint64_t encode(Fields... Values) {
419 return ((Values.encode() << Values.Offset) | ...);
420 }
421
422 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
423 return {Fields::decode((Encoded >> Fields::Offset) &
424 maxUIntN(Fields::Width))...};
425 }
426};
427
429inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
430 return getNamedOperandIdx(Opcode, NamedIdx) != -1;
431}
432
435
456
459
461const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
462
472
474const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
475
478
481
483 MIMGBaseOpcode L;
484 MIMGBaseOpcode LZ;
485};
486
488 MIMGBaseOpcode MIP;
489 MIMGBaseOpcode NONMIP;
490};
491
493 MIMGBaseOpcode Bias;
494 MIMGBaseOpcode NoBias;
495};
496
498 MIMGBaseOpcode Offset;
499 MIMGBaseOpcode NoOffset;
500};
501
503 MIMGBaseOpcode G;
504 MIMGBaseOpcode G16;
505};
506
509
511 unsigned Opcode2Addr;
512 unsigned Opcode3Addr;
513};
514
517
520
523
526
528int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
529 unsigned VDataDwords, unsigned VAddrDwords);
530
532int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
533
535unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
536 const MIMGDimInfo *Dim, bool IsA16,
537 bool IsG16Supported);
538
547
549const MIMGInfo *getMIMGInfo(unsigned Opc);
550
552int getMTBUFBaseOpcode(unsigned Opc);
553
555int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
556
558int getMTBUFElements(unsigned Opc);
559
561bool getMTBUFHasVAddr(unsigned Opc);
562
564bool getMTBUFHasSrsrc(unsigned Opc);
565
567bool getMTBUFHasSoffset(unsigned Opc);
568
570int getMUBUFBaseOpcode(unsigned Opc);
571
573int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
574
576int getMUBUFElements(unsigned Opc);
577
579bool getMUBUFHasVAddr(unsigned Opc);
580
582bool getMUBUFHasSrsrc(unsigned Opc);
583
585bool getMUBUFHasSoffset(unsigned Opc);
586
588bool getMUBUFIsBufferInv(unsigned Opc);
589
591bool getMUBUFTfe(unsigned Opc);
592
594bool getSMEMIsBuffer(unsigned Opc);
595
597bool getVOP1IsSingle(unsigned Opc);
598
600bool getVOP2IsSingle(unsigned Opc);
601
603bool getVOP3IsSingle(unsigned Opc);
604
606bool isVOPC64DPP(unsigned Opc);
607
609bool isVOPCAsmOnly(unsigned Opc);
610
611/// Returns true if MAI operation is a double precision GEMM.
613bool getMAIIsDGEMM(unsigned Opc);
614
616bool getMAIIsGFX940XDL(unsigned Opc);
617
619bool getWMMAIsXDL(unsigned Opc);
620
621// Get an equivalent BitOp3 for a binary logical \p Opc.
622// \returns BitOp3 modifier for the logical operation or zero.
623// Used in VOPD3 conversion.
624unsigned getBitOp2(unsigned Opc);
625
626struct CanBeVOPD {
627 bool X;
628 bool Y;
629};
630
631/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
633unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
634
636CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
637
639uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
640
643 unsigned BLGP,
644 unsigned F8F8Opcode);
645
648
651 unsigned FmtB,
652 unsigned F8F8Opcode);
653
656 uint8_t NumComponents,
657 uint8_t NumFormat,
658 const MCSubtargetInfo &STI);
661 const MCSubtargetInfo &STI);
662
664int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
665
667unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
668
670int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
671 bool VOPD3);
672
674bool isVOPD(unsigned Opc);
675
677bool isMAC(unsigned Opc);
678
680bool isPermlane16(unsigned Opc);
681
683bool isGenericAtomic(unsigned Opc);
684
686bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
687
688namespace VOPD {
689
700
701// LSB mask for VGPR banks per VOPD component operand.
702// 4 banks result in a mask 3, setting 2 lower bits.
703constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
704constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
705
706enum ComponentIndex : unsigned { X = 0, Y = 1 };
708constexpr unsigned COMPONENTS_NUM = 2;
709
710// Properties of VOPD components.
712private:
713 unsigned SrcOperandsNum = 0;
714 unsigned MandatoryLiteralIdx = ~0u;
715 bool HasSrc2Acc = false;
716 unsigned NumVOPD3Mods = 0;
717 unsigned Opcode = 0;
718 bool IsVOP3 = false;
719
720public:
721 ComponentProps() = default;
722 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
723
724 // Return the total number of src operands this component has.
725 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
726
727 // Return the number of src operands of this component visible to the parser.
729 return SrcOperandsNum - HasSrc2Acc;
730 }
731
732 // Return true iif this component has a mandatory literal.
733 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
734
735 // If this component has a mandatory literal, return component operand
736 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
739 return MandatoryLiteralIdx;
740 }
741
742 // Return true iif this component has operand
743 // with component index CompSrcIdx and this operand may be a register.
744 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
745 assert(CompSrcIdx < Component::MAX_SRC_NUM);
746 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
747 }
748
749 // Return true iif this component has tied src2.
750 bool hasSrc2Acc() const { return HasSrc2Acc; }
751
752 // Return a number of source modifiers if instruction is used in VOPD3.
753 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
754
755 // Return opcode of the component.
756 unsigned getOpcode() const { return Opcode; }
757
758 // Returns if component opcode is in VOP3 encoding.
759 unsigned isVOP3() const { return IsVOP3; }
760
761 // Return index of BitOp3 operand or -1.
762 int getBitOp3OperandIdx() const;
763
764private:
765 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
766 assert(CompSrcIdx < Component::MAX_SRC_NUM);
767 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
768 }
769};
770
771enum ComponentKind : unsigned {
772 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
773 COMPONENT_X, // A VOPD instruction, X component.
774 COMPONENT_Y, // A VOPD instruction, Y component.
776};
777
778// Interface functions of this class map VOPD component operand indices
779// to indices of operands in MachineInstr/MCInst or parsed operands array.
780//
781// Note that this class operates with 3 kinds of indices:
782// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
783// - MC operand indices (they refer operands in a MachineInstr/MCInst);
784// - parsed operand indices (they refer operands in parsed operands array).
785//
786// For SINGLE components mapping between these indices is trivial.
787// But things get more complicated for COMPONENT_X and
788// COMPONENT_Y because these components share the same
789// MachineInstr/MCInst and the same parsed operands array.
790// Below is an example of component operand to parsed operand
791// mapping for the following instruction:
792//
793// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
794//
795// PARSED COMPONENT PARSED
796// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
797// -------------------------------------------------------------------
798// "v_dual_add_f32" 0
799// v_dual_add_f32 v255 0 (DST) --> 1
800// v4 1 (SRC0) --> 2
801// v5 2 (SRC1) --> 3
802// "::" 4
803// "v_dual_mov_b32" 5
804// v_dual_mov_b32 v6 0 (DST) --> 6
805// v1 1 (SRC0) --> 7
806// -------------------------------------------------------------------
807//
809private:
810 // Regular MachineInstr/MCInst operands are ordered as follows:
811 // dst, src0 [, other src operands]
812 // VOPD MachineInstr/MCInst operands are ordered as follows:
813 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
814 // Each ComponentKind has operand indices defined below.
815 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
816
817 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
818 // used if there is tied accumulator. Indexing of this array:
819 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
820 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
821 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
822 // For VOPD1/VOPD2 use column with zero modifiers.
823 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
824 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
825
826 // Parsed operands of regular instructions are ordered as follows:
827 // Mnemo dst src0 [vsrc1 ...]
828 // Parsed VOPD operands are ordered as follows:
829 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
830 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
831 // Each ComponentKind has operand indices defined below.
832 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
833 4 /* + OpX.ParsedSrcNum */};
834 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
835 2, 2, 5 /* + OpX.ParsedSrcNum */};
836
837private:
838 const ComponentKind Kind;
839 const ComponentProps PrevComp;
840 const unsigned VOPD3ModsNum;
841 const int BitOp3Idx; // Index of bitop3 operand or -1
842
843public:
844 // Create layout for COMPONENT_X or SINGLE component.
845 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
846 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
848 }
849
850 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
851 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
852 int BitOp3Idx)
853 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
854 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
855
856public:
857 // Return the index of dst operand in MCInst operands.
858 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
859
860 // Return the index of the specified src operand in MCInst operands.
861 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
862 assert(CompSrcIdx < Component::MAX_SRC_NUM);
863
864 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
865 return BitOp3Idx;
866
867 if (VOPD3) {
868 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
869 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
870 }
871
872 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
873 (Kind != SINGLE ? 1 : 0);
874 }
875
876 // Return the index of dst operand in the parsed operands array.
878 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
879 }
880
881 // Return the index of the specified src operand in the parsed operands array.
882 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
883 assert(CompSrcIdx < Component::MAX_SRC_NUM);
884 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
885 }
886
887private:
888 unsigned getPrevCompSrcNum() const {
889 return PrevComp.getCompSrcOperandsNum();
890 }
891 unsigned getPrevCompParsedSrcNum() const {
892 return PrevComp.getCompParsedSrcOperandsNum();
893 }
894 unsigned getPrevCompVOPD3ModsNum() const {
895 return PrevComp.getCompVOPD3ModsNum();
896 }
897};
898
899// Layout and properties of VOPD components.
901public:
902 // Create ComponentInfo for COMPONENT_X or SINGLE component.
905 bool VOP3Layout = false)
906 : ComponentProps(OpDesc, VOP3Layout),
908
909 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
910 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
911 bool VOP3Layout = false)
912 : ComponentProps(OpDesc, VOP3Layout),
915
916 // Map component operand index to parsed operand index.
917 // Return 0 if the specified operand does not exist.
918 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
919};
920
921// Properties of VOPD instructions.
922class InstInfo {
923private:
924 const ComponentInfo CompInfo[COMPONENTS_NUM];
925
926public:
927 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
928
929 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
930 : CompInfo{OpX, OpY} {}
931
932 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
933 : CompInfo{OprInfoX, OprInfoY} {}
934
935 const ComponentInfo &operator[](size_t ComponentIdx) const {
936 assert(ComponentIdx < COMPONENTS_NUM);
937 return CompInfo[ComponentIdx];
938 }
939
940 // Check VOPD operands constraints.
941 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
942 // for the specified component and MC operand. The callback must return 0
943 // if the operand is not a register or not a VGPR.
944 // If \p SkipSrc is set to true then constraints for source operands are not
945 // checked.
946 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
947 // even though it violates requirement to be from different banks.
948 // If \p VOPD3 is set to true both dst registers allowed to be either odd
949 // or even and instruction may have real src2 as opposed to tied accumulator.
950 bool
951 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
952 const MCRegisterInfo &MRI, bool SkipSrc = false,
953 bool AllowSameVGPR = false, bool VOPD3 = false) const {
954 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
955 VOPD3)
956 .has_value();
957 }
958
959 // Check VOPD operands constraints.
960 // Return the index of an invalid component operand, if any.
961 // If \p SkipSrc is set to true then constraints for source operands are not
962 // checked except for being from the same halves of VGPR file on gfx1250.
963 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
964 // even though it violates requirement to be from different banks.
965 // If \p VOPD3 is set to true both dst registers allowed to be either odd
966 // or even and instruction may have real src2 as opposed to tied accumulator.
967 std::optional<unsigned> getInvalidCompOperandIndex(
968 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
969 const MCRegisterInfo &MRI, bool SkipSrc = false,
970 bool AllowSameVGPR = false, bool VOPD3 = false) const;
971
972private:
974 getRegIndices(unsigned ComponentIdx,
975 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
976 bool VOPD3) const;
977};
978
979} // namespace VOPD
980
982std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
983
985// Get properties of 2 single VOP1/VOP2 instructions
986// used as components to create a VOPD instruction.
987VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
988
990// Get properties of VOPD X and Y components.
991VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
992 const MCInstrInfo *InstrInfo);
993
995bool isAsyncStore(unsigned Opc);
997bool isTensorStore(unsigned Opc);
999unsigned getTemporalHintType(const MCInstrDesc TID);
1000
1002bool isTrue16Inst(unsigned Opc);
1003
1005FPType getFPDstSelType(unsigned Opc);
1006
1009
1012
1013bool isDPMACCInstruction(unsigned Opc);
1014
1016unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1017
1019unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1020
1022 const MCSubtargetInfo *STI);
1023
1024bool isGroupSegment(const GlobalValue *GV);
1025bool isGlobalSegment(const GlobalValue *GV);
1026bool isReadOnlySegment(const GlobalValue *GV);
1027
1028/// \returns True if constants should be emitted to .text section for given
1029/// target triple \p TT, false otherwise.
1031
1032/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1033/// register name. Followed by the start register number, and the register
1034/// width. Does not validate the number of registers exists in the class. Unlike
1035/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1036/// "{}".
1037std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1038
1039/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1040/// register constraint. Followed by the start register number, and the register
1041/// width. Does not validate the number of registers exists in the class.
1042std::tuple<char, unsigned, unsigned>
1044
1045/// \returns Integer value requested using \p F's \p Name attribute.
1046///
1047/// \returns \p Default if attribute is not present.
1048///
1049/// \returns \p Default and emits error if requested value cannot be converted
1050/// to integer.
1052
1053/// \returns A pair of integer values requested using \p F's \p Name attribute
1054/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1055/// is false).
1056///
1057/// \returns \p Default if attribute is not present.
1058///
1059/// \returns \p Default and emits error if one of the requested values cannot be
1060/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1061/// not present.
1062std::pair<unsigned, unsigned>
1064 std::pair<unsigned, unsigned> Default,
1065 bool OnlyFirstRequired = false);
1066
1067/// \returns A pair of integer values requested using \p F's \p Name attribute
1068/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1069/// is false).
1070///
1071/// \returns \p std::nullopt if attribute is not present.
1072///
1073/// \returns \p std::nullopt and emits error if one of the requested values
1074/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1075/// "second" value is not present.
1076std::optional<std::pair<unsigned, std::optional<unsigned>>>
1078 bool OnlyFirstRequired = false);
1079
1080/// \returns Generate a vector of integer values requested using \p F's \p Name
1081/// attribute.
1082/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1083/// if any error occurs. The corresponding error will also be emitted.
1085 unsigned Size,
1086 unsigned DefaultVal);
1087/// Similar to the function above, but returns std::nullopt if any error occurs.
1088std::optional<SmallVector<unsigned>>
1089getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1090
1091/// Checks if \p Val is inside \p MD, a !range-like metadata.
1092bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1093
1095 LOAD_CNT = 0, // VMcnt prior to gfx12.
1096 DS_CNT, // LKGMcnt prior to gfx12.
1098 STORE_CNT, // VScnt in gfx10/gfx11.
1101 BVH_CNT, // gfx12+ only.
1102 KM_CNT, // gfx12+ only.
1103 X_CNT, // gfx1250.
1104 ASYNC_CNT, // gfx1250.
1106 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1107 VM_VSRC, // gfx12+ expert mode only.
1110};
1111
1113
1114// Return an iterator over all counters between LOAD_CNT (the first counter)
1115// and \c MaxCounter (exclusive, default value yields an enumeration over
1116// all counters).
1119
1120} // namespace AMDGPU
1121
1122template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1123 static constexpr bool is_iterable = true;
1124};
1125
1126namespace AMDGPU {
1127
1128/// Represents the counter values to wait for in an s_waitcnt instruction.
1129///
1130/// Large values (including the maximum possible integer) can be used to
1131/// represent "don't care" waits.
1132class Waitcnt {
1133 std::array<unsigned, NUM_INST_CNTS> Cnt;
1134
1135public:
1136 unsigned get(InstCounterType T) const { return Cnt[T]; }
1137 void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
1138
1139 Waitcnt() { fill(Cnt, ~0u); }
1140 // Pre-gfx12 constructor.
1141 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1142 : Waitcnt() {
1143 Cnt[LOAD_CNT] = VmCnt;
1144 Cnt[EXP_CNT] = ExpCnt;
1145 Cnt[DS_CNT] = LgkmCnt;
1146 Cnt[STORE_CNT] = VsCnt;
1147 }
1148
1149 // gfx12+ constructor.
1150 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1151 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1152 unsigned AsyncCnt, unsigned VaVdst, unsigned VmVsrc)
1153 : Waitcnt() {
1154 Cnt[LOAD_CNT] = LoadCnt;
1155 Cnt[DS_CNT] = DsCnt;
1156 Cnt[EXP_CNT] = ExpCnt;
1157 Cnt[STORE_CNT] = StoreCnt;
1158 Cnt[SAMPLE_CNT] = SampleCnt;
1159 Cnt[BVH_CNT] = BvhCnt;
1160 Cnt[KM_CNT] = KmCnt;
1161 Cnt[X_CNT] = XCnt;
1162 Cnt[ASYNC_CNT] = AsyncCnt;
1163 Cnt[VA_VDST] = VaVdst;
1164 Cnt[VM_VSRC] = VmVsrc;
1165 }
1166
1167 bool hasWait() const {
1168 return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
1169 }
1170
1173 if (T == STORE_CNT)
1174 continue;
1175 if (Cnt[T] != ~0u)
1176 return true;
1177 }
1178 return false;
1179 }
1180
1181 bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
1182
1183 bool hasWaitDepctr() const {
1184 return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
1185 }
1186
1188 // Does the right thing provided self and Other are either both pre-gfx12
1189 // or both gfx12+.
1190 Waitcnt Wait;
1192 Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
1193 return Wait;
1194 }
1195
1196 void print(raw_ostream &OS) const {
1197 ListSeparator LS;
1199 OS << LS << getInstCounterName(T) << ": " << Cnt[T];
1200 if (LS.unused())
1201 OS << "none";
1202 OS << '\n';
1203 }
1204
1205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1206 LLVM_DUMP_METHOD void dump() const;
1207#endif
1208
1210 Wait.print(OS);
1211 return OS;
1212 }
1213};
1214
1215/// Represents the hardware counter limits for different wait count types.
1217 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1218 unsigned ExpcntMax;
1219 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1220 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1221 unsigned SamplecntMax; // gfx12+ only.
1222 unsigned BvhcntMax; // gfx12+ only.
1223 unsigned KmcntMax; // gfx12+ only.
1224 unsigned XcntMax; // gfx1250.
1225 unsigned AsyncMax; // gfx1250.
1226 unsigned VaVdstMax; // gfx12+ expert mode only.
1227 unsigned VmVsrcMax; // gfx12+ expert mode only.
1228
1229 HardwareLimits() = default;
1230
1231 /// Initializes hardware limits from ISA version.
1233};
1234
1235// The following methods are only meaningful on targets that support
1236// S_WAITCNT.
1237
1238/// \returns Vmcnt bit mask for given isa \p Version.
1239unsigned getVmcntBitMask(const IsaVersion &Version);
1240
1241/// \returns Expcnt bit mask for given isa \p Version.
1242unsigned getExpcntBitMask(const IsaVersion &Version);
1243
1244/// \returns Lgkmcnt bit mask for given isa \p Version.
1245unsigned getLgkmcntBitMask(const IsaVersion &Version);
1246
1247/// \returns Waitcnt bit mask for given isa \p Version.
1248unsigned getWaitcntBitMask(const IsaVersion &Version);
1249
1250/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1251unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1252
1253/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1254unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1255
1256/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1257unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1258
1259/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1260/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1261/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1262/// which needs it is deprecated
1263///
1264/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1265/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1266/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1267/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1268/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1269/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1270/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1271/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1272/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1273///
1274void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1275 unsigned &Expcnt, unsigned &Lgkmcnt);
1276
1277Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1278
1279/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1280unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1281 unsigned Vmcnt);
1282
1283/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1284unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1285 unsigned Expcnt);
1286
1287/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1288unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1289 unsigned Lgkmcnt);
1290
1291/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1292/// \p Version. Should not be used on gfx12+, the instruction which needs
1293/// it is deprecated
1294///
1295/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1296/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1297/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1298/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1299/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1300/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1301/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1302/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1303/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1304/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1305///
1306/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1307/// isa \p Version.
1308///
1309unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1310 unsigned Expcnt, unsigned Lgkmcnt);
1311
1312unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1313
1314// The following methods are only meaningful on targets that support
1315// S_WAIT_*CNT, introduced with gfx12.
1316
1317/// \returns Loadcnt bit mask for given isa \p Version.
1318/// Returns 0 for versions that do not support LOADcnt
1319unsigned getLoadcntBitMask(const IsaVersion &Version);
1320
1321/// \returns Samplecnt bit mask for given isa \p Version.
1322/// Returns 0 for versions that do not support SAMPLEcnt
1323unsigned getSamplecntBitMask(const IsaVersion &Version);
1324
1325/// \returns Bvhcnt bit mask for given isa \p Version.
1326/// Returns 0 for versions that do not support BVHcnt
1327unsigned getBvhcntBitMask(const IsaVersion &Version);
1328
1329/// \returns Asynccnt bit mask for given isa \p Version.
1330/// Returns 0 for versions that do not support Asynccnt
1331unsigned getAsynccntBitMask(const IsaVersion &Version);
1332
1333/// \returns Dscnt bit mask for given isa \p Version.
1334/// Returns 0 for versions that do not support DScnt
1335unsigned getDscntBitMask(const IsaVersion &Version);
1336
1337/// \returns Dscnt bit mask for given isa \p Version.
1338/// Returns 0 for versions that do not support KMcnt
1339unsigned getKmcntBitMask(const IsaVersion &Version);
1340
1341/// \returns Xcnt bit mask for given isa \p Version.
1342/// Returns 0 for versions that do not support Xcnt.
1343unsigned getXcntBitMask(const IsaVersion &Version);
1344
1345/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1346/// returns 0 for versions that do not support STOREcnt or VScnt.
1347/// STOREcnt and VScnt are the same counter, the name used
1348/// depends on the ISA version.
1349unsigned getStorecntBitMask(const IsaVersion &Version);
1350
1351// The following are only meaningful on targets that support
1352// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1353
1354/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1355/// isa \p Version.
1356Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1357
1358/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1359/// isa \p Version.
1360Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1361
1362/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1363/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1364/// \p Version.
1365unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1366
1367/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1368/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1369/// \p Version.
1370unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1371
1372namespace Hwreg {
1373
1376
1377struct HwregSize : EncodingField<15, 11, 32> {
1379 constexpr uint64_t encode() const { return Value - 1; }
1380 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1381};
1382
1384
1385} // namespace Hwreg
1386
1387namespace DepCtr {
1388
1390int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1391 const MCSubtargetInfo &STI);
1392bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1393 const MCSubtargetInfo &STI);
1394bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1395 bool &IsDefault, const MCSubtargetInfo &STI);
1396
1397/// \returns Maximum VaVdst value that can be encoded.
1398unsigned getVaVdstBitMask();
1399
1400/// \returns Maximum VaSdst value that can be encoded.
1401unsigned getVaSdstBitMask();
1402
1403/// \returns Maximum VaSsrc value that can be encoded.
1404unsigned getVaSsrcBitMask();
1405
1406/// \returns Maximum HoldCnt value that can be encoded.
1407unsigned getHoldCntBitMask(const IsaVersion &Version);
1408
1409/// \returns Maximum VmVsrc value that can be encoded.
1410unsigned getVmVsrcBitMask();
1411
1412/// \returns Maximum VaVcc value that can be encoded.
1413unsigned getVaVccBitMask();
1414
1415/// \returns Maximum SaSdst value that can be encoded.
1416unsigned getSaSdstBitMask();
1417
1418/// \returns Decoded VaVdst from given immediate \p Encoded.
1419unsigned decodeFieldVaVdst(unsigned Encoded);
1420
1421/// \returns Decoded VmVsrc from given immediate \p Encoded.
1422unsigned decodeFieldVmVsrc(unsigned Encoded);
1423
1424/// \returns Decoded SaSdst from given immediate \p Encoded.
1425unsigned decodeFieldSaSdst(unsigned Encoded);
1426
1427/// \returns Decoded VaSdst from given immediate \p Encoded.
1428unsigned decodeFieldVaSdst(unsigned Encoded);
1429
1430/// \returns Decoded VaVcc from given immediate \p Encoded.
1431unsigned decodeFieldVaVcc(unsigned Encoded);
1432
1433/// \returns Decoded SaSrc from given immediate \p Encoded.
1434unsigned decodeFieldVaSsrc(unsigned Encoded);
1435
1436/// \returns Decoded HoldCnt from given immediate \p Encoded.
1437unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1438
1439/// \returns \p VmVsrc as an encoded Depctr immediate.
1440unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1441
1442/// \returns \p Encoded combined with encoded \p VmVsrc.
1443unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1444
1445/// \returns \p VaVdst as an encoded Depctr immediate.
1446unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1447
1448/// \returns \p Encoded combined with encoded \p VaVdst.
1449unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1450
1451/// \returns \p SaSdst as an encoded Depctr immediate.
1452unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1453
1454/// \returns \p Encoded combined with encoded \p SaSdst.
1455unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1456
1457/// \returns \p VaSdst as an encoded Depctr immediate.
1458unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1459
1460/// \returns \p Encoded combined with encoded \p VaSdst.
1461unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1462
1463/// \returns \p VaVcc as an encoded Depctr immediate.
1464unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1465
1466/// \returns \p Encoded combined with encoded \p VaVcc.
1467unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1468
1469/// \returns \p HoldCnt as an encoded Depctr immediate.
1470unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1471
1472/// \returns \p Encoded combined with encoded \p HoldCnt.
1473unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1474 const IsaVersion &Version);
1475
1476/// \returns \p VaSsrc as an encoded Depctr immediate.
1477unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1478
1479/// \returns \p Encoded combined with encoded \p VaSsrc.
1480unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1481
1482} // namespace DepCtr
1483
1484namespace Exp {
1485
1486bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1487
1489unsigned getTgtId(const StringRef Name);
1490
1492bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1493
1494} // namespace Exp
1495
1496namespace MTBUFFormat {
1497
1499int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1500
1501void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1502
1503int64_t getDfmt(const StringRef Name);
1504
1505StringRef getDfmtName(unsigned Id);
1506
1507int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1508
1509StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1510
1511bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1512
1513bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1514
1515int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1516
1517StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1518
1519bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1520
1521int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1522 const MCSubtargetInfo &STI);
1523
1524bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1525
1526unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1527
1528} // namespace MTBUFFormat
1529
1530namespace SendMsg {
1531
1533bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1534
1536bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1537 bool Strict = true);
1538
1540bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1541 const MCSubtargetInfo &STI, bool Strict = true);
1542
1544bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1545
1547bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1548
1549void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1550 uint16_t &StreamId, const MCSubtargetInfo &STI);
1551
1553uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1554
1555/// Returns true if the message does not use the m0 operand.
1556bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
1557
1558} // namespace SendMsg
1559
1560unsigned getInitialPSInputAddr(const Function &F);
1561
1562bool getHasColorExport(const Function &F);
1563
1564bool getHasDepthExport(const Function &F);
1565
1567
1568// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1569// the attribute is missing or its value is invalid.
1570unsigned getDynamicVGPRBlockSize(const Function &F);
1571
1573constexpr bool isShader(CallingConv::ID CC) {
1574 switch (CC) {
1584 return true;
1585 default:
1586 return false;
1587 }
1588}
1589
1591constexpr bool isGraphics(CallingConv::ID CC) {
1592 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1594}
1595
1597constexpr bool isCompute(CallingConv::ID CC) {
1598 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1599}
1600
1603 switch (CC) {
1613 return true;
1614 default:
1615 return false;
1616 }
1617}
1618
1620constexpr bool isChainCC(CallingConv::ID CC) {
1621 switch (CC) {
1624 return true;
1625 default:
1626 return false;
1627 }
1628}
1629
1630// These functions are considered entrypoints into the current module, i.e. they
1631// are allowed to be called from outside the current module. This is different
1632// from isEntryFunctionCC, which is only true for functions that are entered by
1633// the hardware. Module entry points include all entry functions but also
1634// include functions that can be called from other functions inside or outside
1635// the current module. Module entry functions are allowed to allocate LDS.
1636//
1637// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1638// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1639// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1642 switch (CC) {
1645 return true;
1646 default:
1647 return isEntryFunctionCC(CC);
1648 }
1649}
1650
1652constexpr inline bool isKernel(CallingConv::ID CC) {
1653 switch (CC) {
1656 return true;
1657 default:
1658 return false;
1659 }
1660}
1661
1662inline bool isKernel(const Function &F) { return isKernel(F.getCallingConv()); }
1663
1666 return CC == CallingConv::Fast;
1667}
1668
1669/// Return true if we might ever do TCO for calls with this calling convention.
1672 switch (CC) {
1673 case CallingConv::C:
1676 return true;
1677 default:
1678 return canGuaranteeTCO(CC);
1679 }
1680}
1681
1682bool hasXNACK(const MCSubtargetInfo &STI);
1683bool hasSRAMECC(const MCSubtargetInfo &STI);
1684bool hasMIMG_R128(const MCSubtargetInfo &STI);
1685bool hasA16(const MCSubtargetInfo &STI);
1686bool hasG16(const MCSubtargetInfo &STI);
1687bool hasPackedD16(const MCSubtargetInfo &STI);
1688bool hasGDS(const MCSubtargetInfo &STI);
1689unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1690unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1691
1692bool isSI(const MCSubtargetInfo &STI);
1693bool isCI(const MCSubtargetInfo &STI);
1694bool isVI(const MCSubtargetInfo &STI);
1695bool isGFX9(const MCSubtargetInfo &STI);
1696bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1697bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1698bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1699bool isGFX8Plus(const MCSubtargetInfo &STI);
1700bool isGFX9Plus(const MCSubtargetInfo &STI);
1701bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1702bool isGFX10(const MCSubtargetInfo &STI);
1703bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1704bool isGFX10Plus(const MCSubtargetInfo &STI);
1705bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1706bool isGFX10Before1030(const MCSubtargetInfo &STI);
1707bool isGFX11(const MCSubtargetInfo &STI);
1708bool isGFX11Plus(const MCSubtargetInfo &STI);
1709bool isGFX12(const MCSubtargetInfo &STI);
1710bool isGFX12Plus(const MCSubtargetInfo &STI);
1711bool isGFX1250(const MCSubtargetInfo &STI);
1712bool isGFX1250Plus(const MCSubtargetInfo &STI);
1713bool isGFX13(const MCSubtargetInfo &STI);
1714bool isGFX13Plus(const MCSubtargetInfo &STI);
1715bool supportsWGP(const MCSubtargetInfo &STI);
1716bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1717bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1718bool isGCN3Encoding(const MCSubtargetInfo &STI);
1719bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1720bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1721bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1722bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1723bool isGFX90A(const MCSubtargetInfo &STI);
1724bool isGFX940(const MCSubtargetInfo &STI);
1726bool hasMAIInsts(const MCSubtargetInfo &STI);
1727bool hasVOPD(const MCSubtargetInfo &STI);
1728bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1729
1730inline bool supportsWave32(const MCSubtargetInfo &STI) {
1731 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1732}
1733
1734int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1735unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1737
1738/// Is Reg - scalar register
1739bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1740
1741/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1742bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1743
1744/// If \p Reg is a pseudo reg, return the correct hardware register given
1745/// \p STI otherwise return \p Reg.
1747
1748/// Convert hardware register \p Reg to a pseudo register
1751
1754
1755/// Is this an AMDGPU specific source operand? These include registers,
1756/// inline constants, literals and mandatory literals (KImm).
1757constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1758 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1759 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1760}
1761
1762inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1763 return isSISrcOperand(Desc.operands()[OpNo]);
1764}
1765
1766/// Is this a KImm operand?
1767bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1768
1769/// Is this floating-point operand?
1770bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1771
1772/// Does this operand support only inlinable literals?
1773bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1774
1775/// Get the size in bits of a register from the register class \p RC.
1776unsigned getRegBitWidth(unsigned RCID);
1777
1778/// Get the size in bits of a register from the register class \p RC.
1779unsigned getRegBitWidth(const MCRegisterClass &RC);
1780
1782inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1783 switch (OpInfo.OperandType) {
1793 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1795 return 4;
1796
1803 return 8;
1804
1819 return 2;
1820
1821 default:
1822 llvm_unreachable("unhandled operand type");
1823 }
1824}
1825
1827inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1828 return getOperandSize(Desc.operands()[OpNo]);
1829}
1830
1831/// Is this literal inlinable, and not one of the values intended for floating
1832/// point values.
1834inline bool isInlinableIntLiteral(int64_t Literal) {
1835 return Literal >= -16 && Literal <= 64;
1836}
1837
1838/// Is this literal inlinable
1840bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1841
1843bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1844
1846bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1847
1849bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1850
1852bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1853
1855std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1856
1858std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1859
1861std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1862
1864std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1865 bool IsGFX11Plus);
1866
1869
1872
1875
1878
1880bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1881
1883bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1884
1886int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1887
1888bool isArgPassedInSGPR(const Argument *Arg);
1889
1890bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1891
1892LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1893
1896 int64_t EncodedOffset);
1897
1900 int64_t EncodedOffset, bool IsBuffer);
1901
1902/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1903/// offsets.
1905
1906/// \returns The encoding that will be used for \p ByteOffset in the
1907/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1908/// S_LOAD instructions have a signed offset, on other subtargets it is
1909/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1910std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1911 int64_t ByteOffset, bool IsBuffer,
1912 bool HasSOffset = false);
1913
1914/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1915/// instruction. This is only useful on CI.s
1916std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1917 int64_t ByteOffset);
1918
1919/// For pre-GFX12 FLAT instructions the offset must be positive;
1920/// MSB is ignored and forced to zero.
1921///
1922/// \return The number of bits available for the signed offset field in flat
1923/// instructions. Note that some forms of the instruction disallow negative
1924/// offsets.
1925unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1926
1927/// \returns true if this offset is small enough to fit in the SMRD
1928/// offset field. \p ByteOffset should be the offset in bytes and
1929/// not the encoded offset.
1930bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1931
1933inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1934 if (isGFX12(ST))
1935 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1936 if (isGFX90A(ST))
1937 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1938 return false;
1939}
1940
1941/// \returns true if an instruction may have a 64-bit VGPR operand.
1943 const MCSubtargetInfo &ST);
1944
1945/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1946bool isDPALU_DPP32BitOpc(unsigned Opc);
1947
1948/// \returns true if an instruction is a DP ALU DPP.
1949bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1950 const MCSubtargetInfo &ST);
1951
1952/// \returns true if the intrinsic is divergent
1953bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1954
1955/// \returns true if the intrinsic is uniform
1956bool isIntrinsicAlwaysUniform(unsigned IntrID);
1957
1958/// \returns a register class for the physical register \p Reg if it is a VGPR
1959/// or nullptr otherwise.
1961 const MCRegisterInfo &MRI);
1962
1963/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1964/// physical register \p Reg.
1965unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1966
1967/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1969 const MCRegisterInfo &MRI);
1970
1971/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1972/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1973/// ignored.
1974std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
1975 bool HasSetregVGPRMSBFixup);
1976
1977/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1978/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1979/// ignored.
1980std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
1981 bool HasSetregVGPRMSBFixup);
1982
1983// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
1984// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
1985// maps, one for X and one for Y component.
1986std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
1988
1989/// \returns true if a memory instruction supports scale_offset modifier.
1990bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
1991
1992/// \returns lds block size in terms of dwords. \p
1993/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1994/// must be defined in terms of bytes.
1995unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1996
1998public:
2000
2001 ClusterDimsAttr() = default;
2002
2003 Kind getKind() const { return AttrKind; }
2004
2005 bool isUnknown() const { return getKind() == Kind::Unknown; }
2006
2007 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
2008
2009 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2010
2011 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2012
2014
2016
2018
2019 /// \returns the dims stored. Note that this function can only be called if
2020 /// the kind is \p Fixed.
2021 const std::array<unsigned, 3> &getDims() const;
2022
2023 bool operator==(const ClusterDimsAttr &RHS) const {
2024 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2025 }
2026
2027 std::string to_string() const;
2028
2029 static ClusterDimsAttr get(const Function &F);
2030
2031private:
2032 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
2033
2034 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2035
2036 std::array<unsigned, 3> Dims = {0, 0, 0};
2037
2038 Kind AttrKind = Kind::Unknown;
2039};
2040
2041} // namespace AMDGPU
2042
2045
2046} // end namespace llvm
2047
2048#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_READONLY
Definition Compiler.h:322
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Register const TargetRegisterInfo * TRI
#define T
This file contains some functions that are useful when dealing with strings.
Value * RHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static ClusterDimsAttr get(const Function &F)
bool operator==(const ClusterDimsAttr &RHS) const
const std::array< unsigned, 3 > & getDims() const
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
void setSramEccSetting(TargetIDSetting NewSramEccSetting)
Sets sramecc setting to NewSramEccSetting.
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
ComponentInfo(const MCInstrDesc &OpDesc, ComponentKind Kind=ComponentKind::SINGLE, bool VOP3Layout=false)
ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps, bool VOP3Layout=false)
unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const
ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum, int BitOp3Idx)
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
bool hasRegSrcOperand(unsigned CompSrcIdx) const
unsigned getMandatoryLiteralCompOperandIndex() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
bool hasInvalidOperand(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
const ComponentInfo & operator[](size_t ComponentIdx) const
InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
void print(raw_ostream &OS) const
LLVM_DUMP_METHOD void dump() const
bool hasWaitExceptStoreCnt() const
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt, unsigned AsyncCnt, unsigned VaVdst, unsigned VmVsrc)
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
Waitcnt combined(const Waitcnt &Other) const
unsigned get(InstCounterType T) const
friend raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
void set(InstCounterType T, unsigned Val)
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A helper class to return the specified delimiter string after the first invocation of operator String...
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
Metadata node.
Definition Metadata.h:1080
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
Generic target versions emitted by this version of LLVM.
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
EncodingField< 10, 6 > HwregOffset
EncodingField< 5, 0 > HwregId
EncodingFields< HwregId, HwregOffset, HwregSize > HwregEncoding
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
constexpr unsigned getMaxFlatWorkGroupSize()
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
raw_ostream & operator<<(raw_ostream &OS, const AMDGPUTargetID &TargetID)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
iota_range< InstCounterType > inst_counter_types(InstCounterType MaxCounter)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
unsigned getAsynccntBitMask(const IsaVersion &Version)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
LLVM_READONLY bool isInvalidSingleUseProducerInst(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_READONLY bool isInvalidSingleUseConsumerInst(unsigned Opc)
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
LLVM_READNONE constexpr bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
StringLiteral getInstCounterName(InstCounterType T)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getSOPPWithRelaxation(uint32_t Opcode)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
bool isGFX1250(const MCSubtargetInfo &STI)
bool supportsWave32(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool hasDynamicVGPR(const Function &F)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool canGuaranteeTCO(CallingConv::ID CC)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition MathExtras.h:207
@ Wait
Definition Threading.h:60
Op::Description Desc
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
AMD Kernel Code Object (amd_kernel_code_t).
constexpr EncodingField(ValueType Value)
static ValueType decode(uint64_t Encoded)
constexpr uint64_t encode() const
static constexpr uint64_t encode(Fields... Values)
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
constexpr EncodingField(ValueType Value)
constexpr uint64_t encode() const
static ValueType decode(uint64_t Encoded)
Instruction set architecture version.