19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
30#define GET_INSTRINFO_NAMED_OPS
31#define GET_INSTRMAP_INFO
32#include "AMDGPUGenInstrInfo.inc"
42unsigned getBitMask(
unsigned Shift,
unsigned Width) {
43 return ((1 << Width) - 1) << Shift;
49unsigned packBits(
unsigned Src,
unsigned Dst,
unsigned Shift,
unsigned Width) {
50 unsigned Mask = getBitMask(Shift, Width);
51 return ((Src << Shift) & Mask) | (Dst & ~Mask);
57unsigned unpackBits(
unsigned Src,
unsigned Shift,
unsigned Width) {
58 return (Src & getBitMask(Shift, Width)) >> Shift;
62unsigned getVmcntBitShiftLo(
unsigned VersionMajor) {
67unsigned getVmcntBitWidthLo(
unsigned VersionMajor) {
72unsigned getExpcntBitShift(
unsigned VersionMajor) {
77unsigned getExpcntBitWidth(
unsigned VersionMajor) {
return 3; }
80unsigned getLgkmcntBitShift(
unsigned VersionMajor) {
85unsigned getLgkmcntBitWidth(
unsigned VersionMajor) {
90unsigned getVmcntBitShiftHi(
unsigned VersionMajor) {
return 14; }
93unsigned getVmcntBitWidthHi(
unsigned VersionMajor) {
94 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
156 if (
auto Ver = mdconst::extract_or_null<ConstantInt>(
157 M.getModuleFlag(
"amdgpu_code_object_version"))) {
158 return (
unsigned)Ver->getZExtValue() / 100;
166 switch (CodeObjectVersion) {
181 switch (CodeObjectVersion) {
193 switch (CodeObjectVersion) {
205 switch (CodeObjectVersion) {
216#define GET_MIMGBaseOpcodesTable_IMPL
217#define GET_MIMGDimInfoTable_IMPL
218#define GET_MIMGInfoTable_IMPL
219#define GET_MIMGLZMappingTable_IMPL
220#define GET_MIMGMIPMappingTable_IMPL
221#define GET_MIMGBiasMappingTable_IMPL
222#define GET_MIMGOffsetMappingTable_IMPL
223#define GET_MIMGG16MappingTable_IMPL
224#define GET_MAIInstInfoTable_IMPL
225#include "AMDGPUGenSearchableTables.inc"
228 unsigned VDataDwords,
unsigned VAddrDwords) {
229 const MIMGInfo *
Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
230 VDataDwords, VAddrDwords);
244 return NewInfo ? NewInfo->
Opcode : -1;
249 bool IsG16Supported) {
256 AddrWords += AddrComponents;
264 if ((IsA16 && !IsG16Supported) || BaseOpcode->
G16)
325#define GET_MTBUFInfoTable_DECL
326#define GET_MTBUFInfoTable_IMPL
327#define GET_MUBUFInfoTable_DECL
328#define GET_MUBUFInfoTable_IMPL
329#define GET_SMInfoTable_DECL
330#define GET_SMInfoTable_IMPL
331#define GET_VOP1InfoTable_DECL
332#define GET_VOP1InfoTable_IMPL
333#define GET_VOP2InfoTable_DECL
334#define GET_VOP2InfoTable_IMPL
335#define GET_VOP3InfoTable_DECL
336#define GET_VOP3InfoTable_IMPL
337#define GET_VOPC64DPPTable_DECL
338#define GET_VOPC64DPPTable_IMPL
339#define GET_VOPC64DPP8Table_DECL
340#define GET_VOPC64DPP8Table_IMPL
341#define GET_VOPDComponentTable_DECL
342#define GET_VOPDComponentTable_IMPL
343#define GET_VOPDPairs_DECL
344#define GET_VOPDPairs_IMPL
345#define GET_VOPTrue16Table_DECL
346#define GET_VOPTrue16Table_IMPL
347#define GET_WMMAOpcode2AddrMappingTable_DECL
348#define GET_WMMAOpcode2AddrMappingTable_IMPL
349#define GET_WMMAOpcode3AddrMappingTable_DECL
350#define GET_WMMAOpcode3AddrMappingTable_IMPL
351#include "AMDGPUGenSearchableTables.inc"
355 return Info ?
Info->BaseOpcode : -1;
359 const MTBUFInfo *
Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
370 return Info ?
Info->has_vaddr :
false;
375 return Info ?
Info->has_srsrc :
false;
380 return Info ?
Info->has_soffset :
false;
385 return Info ?
Info->BaseOpcode : -1;
389 const MUBUFInfo *
Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
400 return Info ?
Info->has_vaddr :
false;
405 return Info ?
Info->has_srsrc :
false;
410 return Info ?
Info->has_soffset :
false;
415 return Info ?
Info->IsBufferInv :
false;
419 const SMInfo *
Info = getSMEMOpcodeHelper(Opc);
420 return Info ?
Info->IsBuffer :
false;
425 return Info ?
Info->IsSingle :
false;
430 return Info ?
Info->IsSingle :
false;
435 return Info ?
Info->IsSingle :
false;
439 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
444 return Info ?
Info->is_dgemm :
false;
449 return Info ?
Info->is_gfx940_xdl :
false;
455 return {
Info->CanBeVOPDX,
true};
457 return {
false,
false};
470 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
471 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
472 Opc == AMDGPU::V_MAC_F32_e64_vi ||
473 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
474 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
475 Opc == AMDGPU::V_MAC_F16_e64_vi ||
476 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
477 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
478 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
479 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
480 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
481 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
482 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
483 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
484 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
485 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
486 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
487 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
491 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
492 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
493 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
494 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
499 return Info ?
Info->IsTrue16 :
false;
504 return Info ?
Info->Opcode3Addr : ~0u;
509 return Info ?
Info->Opcode2Addr : ~0u;
516 return getMCOpcodeGen(Opcode,
static_cast<Subtarget
>(Gen));
520 const VOPDInfo *
Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
527 auto OpX = getVOPDBaseFromComponent(
Info->OpX);
528 auto OpY = getVOPDBaseFromComponent(
Info->OpY);
530 return {OpX->BaseVOP, OpY->BaseVOP};
542 HasSrc2Acc = TiedIdx != -1;
549 for (CompOprIdx =
Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
551 MandatoryLiteralIdx = CompOprIdx;
572 std::function<
unsigned(
unsigned,
unsigned)> GetRegIdx)
const {
579 unsigned BanksNum =
BANKS_NUM[CompOprIdx];
580 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
581 (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum))
597 std::function<
unsigned(
unsigned,
unsigned)> GetRegIdx)
const {
600 const auto &Comp = CompInfo[CompIdx];
603 RegIndices[
DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
606 unsigned CompSrcIdx = CompOprIdx -
DST_NUM;
608 Comp.hasRegSrcOperand(CompSrcIdx)
609 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
624 const auto &OpXDesc =
InstrInfo->get(OpX);
625 const auto &OpYDesc =
InstrInfo->get(OpY);
647 std::optional<bool> XnackRequested;
648 std::optional<bool> SramEccRequested;
650 for (
const std::string &Feature : Features.
getFeatures()) {
651 if (Feature ==
"+xnack")
652 XnackRequested =
true;
653 else if (Feature ==
"-xnack")
654 XnackRequested =
false;
655 else if (Feature ==
"+sramecc")
656 SramEccRequested =
true;
657 else if (Feature ==
"-sramecc")
658 SramEccRequested =
false;
664 if (XnackRequested) {
665 if (XnackSupported) {
671 if (*XnackRequested) {
672 errs() <<
"warning: xnack 'On' was requested for a processor that does "
675 errs() <<
"warning: xnack 'Off' was requested for a processor that "
676 "does not support it!\n";
681 if (SramEccRequested) {
682 if (SramEccSupported) {
689 if (*SramEccRequested) {
690 errs() <<
"warning: sramecc 'On' was requested for a processor that "
691 "does not support it!\n";
693 errs() <<
"warning: sramecc 'Off' was requested for a processor that "
694 "does not support it!\n";
712 TargetID.
split(TargetIDSplit,
':');
714 for (
const auto &FeatureString : TargetIDSplit) {
715 if (FeatureString.startswith(
"xnack"))
717 if (FeatureString.startswith(
"sramecc"))
723 std::string StringRep;
729 StreamRep << TargetTriple.getArchName() <<
'-'
730 << TargetTriple.getVendorName() <<
'-'
731 << TargetTriple.getOSName() <<
'-'
732 << TargetTriple.getEnvironmentName() <<
'-';
734 std::string Processor;
738 if (Version.Major >= 9)
741 Processor = (
Twine(
"gfx") +
Twine(Version.Major) +
Twine(Version.Minor) +
742 Twine(Version.Stepping))
745 std::string Features;
747 switch (CodeObjectVersion) {
751 if (Processor ==
"gfx600") {
752 }
else if (Processor ==
"gfx601") {
753 }
else if (Processor ==
"gfx602") {
754 }
else if (Processor ==
"gfx700") {
755 }
else if (Processor ==
"gfx701") {
756 }
else if (Processor ==
"gfx702") {
757 }
else if (Processor ==
"gfx703") {
758 }
else if (Processor ==
"gfx704") {
759 }
else if (Processor ==
"gfx705") {
760 }
else if (Processor ==
"gfx801") {
763 "AMD GPU code object V2 does not support processor " +
764 Twine(Processor) +
" without XNACK");
765 }
else if (Processor ==
"gfx802") {
766 }
else if (Processor ==
"gfx803") {
767 }
else if (Processor ==
"gfx805") {
768 }
else if (Processor ==
"gfx810") {
771 "AMD GPU code object V2 does not support processor " +
772 Twine(Processor) +
" without XNACK");
773 }
else if (Processor ==
"gfx900") {
775 Processor =
"gfx901";
776 }
else if (Processor ==
"gfx902") {
778 Processor =
"gfx903";
779 }
else if (Processor ==
"gfx904") {
781 Processor =
"gfx905";
782 }
else if (Processor ==
"gfx906") {
784 Processor =
"gfx907";
785 }
else if (Processor ==
"gfx90c") {
788 "AMD GPU code object V2 does not support processor " +
789 Twine(Processor) +
" with XNACK being ON or ANY");
792 "AMD GPU code object V2 does not support processor " +
799 Features +=
"+xnack";
803 Features +=
"+sram-ecc";
809 Features +=
":sramecc-";
811 Features +=
":sramecc+";
814 Features +=
":xnack-";
816 Features +=
":xnack+";
823 StreamRep << Processor << Features;
839 unsigned BytesPerCU = 0;
874 unsigned FlatWorkGroupSize) {
875 assert(FlatWorkGroupSize != 0);
885 unsigned MaxBarriers = 16;
889 return std::min(MaxWaves /
N, MaxBarriers);
906 unsigned FlatWorkGroupSize) {
921 unsigned FlatWorkGroupSize) {
927 if (Version.Major >= 10)
929 if (Version.Major >= 8)
940 if (Version.Major >= 8)
950 if (Version.Major >= 10)
952 if (Version.Major >= 8)
961 if (Version.Major >= 10)
980 if (Version.Major >= 10)
981 return Addressable ? AddressableNumSGPRs : 108;
982 if (Version.Major >= 8 && !Addressable)
983 AddressableNumSGPRs = 112;
988 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
992 bool FlatScrUsed,
bool XNACKUsed) {
993 unsigned ExtraSGPRs = 0;
998 if (Version.Major >= 10)
1001 if (Version.Major < 8) {
1029 std::optional<bool> EnableWavefrontSize32) {
1033 bool IsWave32 = EnableWavefrontSize32 ?
1034 *EnableWavefrontSize32 :
1038 return IsWave32 ? 24 : 12;
1041 return IsWave32 ? 16 : 8;
1043 return IsWave32 ? 8 : 4;
1047 std::optional<bool> EnableWavefrontSize32) {
1051 bool IsWave32 = EnableWavefrontSize32 ?
1052 *EnableWavefrontSize32 :
1055 return IsWave32 ? 8 : 4;
1065 return IsWave32 ? 1536 : 768;
1066 return IsWave32 ? 1024 : 512;
1076 unsigned NumVGPRs) {
1079 if (NumVGPRs < Granule)
1081 unsigned RoundedRegs =
alignTo(NumVGPRs, Granule);
1082 return std::min(std::max(
getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1089 if (WavesPerEU >= MaxWavesPerEU)
1095 unsigned MaxNumVGPRs =
alignDown(TotNumVGPRs / WavesPerEU, Granule);
1097 if (MaxNumVGPRs ==
alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1101 if (WavesPerEU < MinWavesPerEU)
1104 unsigned MaxNumVGPRsNext =
alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1105 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1106 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1115 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1119 std::optional<bool> EnableWavefrontSize32) {
1120 NumVGPRs =
alignTo(std::max(1u, NumVGPRs),
1132 memset(&Header, 0,
sizeof(Header));
1134 Header.amd_kernel_code_version_major = 1;
1135 Header.amd_kernel_code_version_minor = 2;
1136 Header.amd_machine_kind = 1;
1137 Header.amd_machine_version_major = Version.Major;
1138 Header.amd_machine_version_minor = Version.Minor;
1139 Header.amd_machine_version_stepping = Version.Stepping;
1140 Header.kernel_code_entry_byte_offset =
sizeof(Header);
1141 Header.wavefront_size = 6;
1145 Header.call_convention = -1;
1149 Header.kernarg_segment_alignment = 4;
1150 Header.group_segment_alignment = 4;
1151 Header.private_segment_alignment = 4;
1153 if (Version.Major >= 10) {
1155 Header.wavefront_size = 5;
1158 Header.compute_pgm_resource_registers |=
1169 memset(&KD, 0,
sizeof(KD));
1172 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1175 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
1177 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
1179 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1180 if (Version.Major >= 10) {
1182 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1185 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1188 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1192 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1219 bool OnlyFirstRequired) {
1221 if (!
A.isStringAttribute())
1225 std::pair<int, int> Ints =
Default;
1226 std::pair<StringRef, StringRef> Strs =
A.getValueAsString().split(
',');
1227 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1228 Ctx.
emitError(
"can't parse first integer attribute " +
Name);
1231 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1232 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1233 Ctx.
emitError(
"can't parse second integer attribute " +
Name);
1242 return (1 << (getVmcntBitWidthLo(Version.Major) +
1243 getVmcntBitWidthHi(Version.Major))) -
1248 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1252 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1256 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1257 getVmcntBitWidthLo(Version.Major));
1258 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1259 getExpcntBitWidth(Version.Major));
1260 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1261 getLgkmcntBitWidth(Version.Major));
1262 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1263 getVmcntBitWidthHi(Version.Major));
1264 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1268 unsigned VmcntLo = unpackBits(
Waitcnt, getVmcntBitShiftLo(Version.Major),
1269 getVmcntBitWidthLo(Version.Major));
1270 unsigned VmcntHi = unpackBits(
Waitcnt, getVmcntBitShiftHi(Version.Major),
1271 getVmcntBitWidthHi(Version.Major));
1272 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1276 return unpackBits(
Waitcnt, getExpcntBitShift(Version.Major),
1277 getExpcntBitWidth(Version.Major));
1281 return unpackBits(
Waitcnt, getLgkmcntBitShift(Version.Major),
1282 getLgkmcntBitWidth(Version.Major));
1286 unsigned &Vmcnt,
unsigned &Expcnt,
unsigned &Lgkmcnt) {
1302 Waitcnt = packBits(Vmcnt,
Waitcnt, getVmcntBitShiftLo(Version.Major),
1303 getVmcntBitWidthLo(Version.Major));
1304 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major),
Waitcnt,
1305 getVmcntBitShiftHi(Version.Major),
1306 getVmcntBitWidthHi(Version.Major));
1311 return packBits(Expcnt,
Waitcnt, getExpcntBitShift(Version.Major),
1312 getExpcntBitWidth(Version.Major));
1317 return packBits(Lgkmcnt,
Waitcnt, getLgkmcntBitShift(Version.Major),
1318 getLgkmcntBitWidth(Version.Major));
1322 unsigned Vmcnt,
unsigned Expcnt,
unsigned Lgkmcnt) {
1357 for (
int Idx = 0;
Idx < OpInfoSize; ++
Idx) {
1369 int OpInfoSize,
T Context) {
1371 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1376 T Context,
bool QuickCheck =
true) {
1378 return Op.Encoding == Id && !Op.Name.empty();
1383 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize,
Context) &&
1387 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1399 const auto &Op = Opr[
Idx];
1400 if (Op.isSupported(STI))
1401 Enc |= Op.encode(Op.Default);
1407 int Size,
unsigned Code,
1408 bool &HasNonDefaultVal,
1410 unsigned UsedOprMask = 0;
1411 HasNonDefaultVal =
false;
1413 const auto &Op = Opr[
Idx];
1414 if (!Op.isSupported(STI))
1416 UsedOprMask |= Op.getMask();
1417 unsigned Val = Op.decode(Code);
1418 if (!Op.isValid(Val))
1420 HasNonDefaultVal |= (Val != Op.Default);
1422 return (Code & ~UsedOprMask) == 0;
1427 unsigned &Val,
bool &IsDefault,
1430 const auto &Op = Opr[
Idx++];
1431 if (Op.isSupported(STI)) {
1433 Val = Op.decode(Code);
1434 IsDefault = (Val == Op.Default);
1444 if (InputVal < 0 || InputVal > Op.Max)
1446 return Op.encode(InputVal);
1451 unsigned &UsedOprMask,
1455 const auto &Op = Opr[
Idx];
1456 if (Op.Name ==
Name) {
1457 if (!Op.isSupported(STI)) {
1461 auto OprMask = Op.getMask();
1462 if (OprMask & UsedOprMask)
1464 UsedOprMask |= OprMask;
1487 HasNonDefaultVal, STI);
1516 return 0 <=
Id && isUInt<ID_WIDTH_>(
Id);
1524 return 0 <= (
Width - 1) && isUInt<WIDTH_M1_WIDTH_>(
Width - 1);
1570 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1571 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1582 if (Val.MaxIndex == 0 &&
Name == Val.Name)
1585 if (Val.MaxIndex > 0 &&
Name.startswith(Val.Name)) {
1593 if (Suffix.
size() > 1 && Suffix[0] ==
'0')
1596 return Val.Tgt + Id;
1625namespace MTBUFFormat {
1651 if (
Name == lookupTable[Id])
1756 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId,
Msg,
MSG_SIZE, STI);
1764 for (
int i =
F; i < L; ++i) {
1777 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1856 return F.getFnAttributeAsParsedInteger(
"InitialPSInputAddr", 0);
1861 return F.getFnAttributeAsParsedInteger(
1862 "amdgpu-color-export",
1867 return F.getFnAttributeAsParsedInteger(
"amdgpu-depth-export", 0) != 0;
1928 return STI.
hasFeature(AMDGPU::FeatureSRAMECC);
1944 return !STI.
hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !
isCI(STI) &&
1950 if (Version.Major == 10)
1951 return Version.Minor >= 3 ? 13 : 5;
1952 if (Version.Major == 11)
1958 return STI.
hasFeature(AMDGPU::FeatureSouthernIslands);
1962 return STI.
hasFeature(AMDGPU::FeatureSeaIslands);
1966 return STI.
hasFeature(AMDGPU::FeatureVolcanicIslands);
2018 return STI.
hasFeature(AMDGPU::FeatureGCN3Encoding);
2022 return STI.
hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2026 return STI.
hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2030 return STI.
hasFeature(AMDGPU::FeatureGFX10_3Insts);
2034 return STI.
hasFeature(AMDGPU::FeatureGFX90AInsts);
2038 return STI.
hasFeature(AMDGPU::FeatureGFX940Insts);
2042 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2046 return STI.
hasFeature(AMDGPU::FeatureMAIInsts);
2054 int32_t ArgNumVGPR) {
2055 if (has90AInsts && ArgNumAGPR)
2056 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2057 return std::max(ArgNumVGPR, ArgNumAGPR);
2062 const unsigned FirstSubReg =
TRI->getSubReg(
Reg, AMDGPU::sub0);
2063 return SGPRClass.
contains(FirstSubReg != 0 ? FirstSubReg :
Reg) ||
2067#define MAP_REG2REG \
2068 using namespace AMDGPU; \
2070 default: return Reg; \
2071 CASE_CI_VI(FLAT_SCR) \
2072 CASE_CI_VI(FLAT_SCR_LO) \
2073 CASE_CI_VI(FLAT_SCR_HI) \
2074 CASE_VI_GFX9PLUS(TTMP0) \
2075 CASE_VI_GFX9PLUS(TTMP1) \
2076 CASE_VI_GFX9PLUS(TTMP2) \
2077 CASE_VI_GFX9PLUS(TTMP3) \
2078 CASE_VI_GFX9PLUS(TTMP4) \
2079 CASE_VI_GFX9PLUS(TTMP5) \
2080 CASE_VI_GFX9PLUS(TTMP6) \
2081 CASE_VI_GFX9PLUS(TTMP7) \
2082 CASE_VI_GFX9PLUS(TTMP8) \
2083 CASE_VI_GFX9PLUS(TTMP9) \
2084 CASE_VI_GFX9PLUS(TTMP10) \
2085 CASE_VI_GFX9PLUS(TTMP11) \
2086 CASE_VI_GFX9PLUS(TTMP12) \
2087 CASE_VI_GFX9PLUS(TTMP13) \
2088 CASE_VI_GFX9PLUS(TTMP14) \
2089 CASE_VI_GFX9PLUS(TTMP15) \
2090 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2091 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2092 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2093 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2094 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2095 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2096 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2097 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2098 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2099 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2100 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2101 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2102 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2103 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2104 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2105 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2106 CASE_GFXPRE11_GFX11PLUS(M0) \
2107 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2108 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2111#define CASE_CI_VI(node) \
2112 assert(!isSI(STI)); \
2113 case node: return isCI(STI) ? node##_ci : node##_vi;
2115#define CASE_VI_GFX9PLUS(node) \
2116 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2118#define CASE_GFXPRE11_GFX11PLUS(node) \
2119 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2121#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2122 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2131#undef CASE_VI_GFX9PLUS
2132#undef CASE_GFXPRE11_GFX11PLUS
2133#undef CASE_GFXPRE11_GFX11PLUS_TO
2135#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2136#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2137#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2138#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2146 case AMDGPU::SRC_SHARED_BASE_LO:
2147 case AMDGPU::SRC_SHARED_BASE:
2148 case AMDGPU::SRC_SHARED_LIMIT_LO:
2149 case AMDGPU::SRC_SHARED_LIMIT:
2150 case AMDGPU::SRC_PRIVATE_BASE_LO:
2151 case AMDGPU::SRC_PRIVATE_BASE:
2152 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2153 case AMDGPU::SRC_PRIVATE_LIMIT:
2154 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2156 case AMDGPU::SRC_VCCZ:
2157 case AMDGPU::SRC_EXECZ:
2158 case AMDGPU::SRC_SCC:
2160 case AMDGPU::SGPR_NULL:
2168#undef CASE_VI_GFX9PLUS
2169#undef CASE_GFXPRE11_GFX11PLUS
2170#undef CASE_GFXPRE11_GFX11PLUS_TO
2175 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2182 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2189 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2218 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2227 case AMDGPU::VGPR_LO16RegClassID:
2228 case AMDGPU::VGPR_HI16RegClassID:
2229 case AMDGPU::SGPR_LO16RegClassID:
2230 case AMDGPU::AGPR_LO16RegClassID:
2232 case AMDGPU::SGPR_32RegClassID:
2233 case AMDGPU::VGPR_32RegClassID:
2234 case AMDGPU::VRegOrLds_32RegClassID:
2235 case AMDGPU::AGPR_32RegClassID:
2236 case AMDGPU::VS_32RegClassID:
2237 case AMDGPU::AV_32RegClassID:
2238 case AMDGPU::SReg_32RegClassID:
2239 case AMDGPU::SReg_32_XM0RegClassID:
2240 case AMDGPU::SRegOrLds_32RegClassID:
2242 case AMDGPU::SGPR_64RegClassID:
2243 case AMDGPU::VS_64RegClassID:
2244 case AMDGPU::SReg_64RegClassID:
2245 case AMDGPU::VReg_64RegClassID:
2246 case AMDGPU::AReg_64RegClassID:
2247 case AMDGPU::SReg_64_XEXECRegClassID:
2248 case AMDGPU::VReg_64_Align2RegClassID:
2249 case AMDGPU::AReg_64_Align2RegClassID:
2250 case AMDGPU::AV_64RegClassID:
2251 case AMDGPU::AV_64_Align2RegClassID:
2253 case AMDGPU::SGPR_96RegClassID:
2254 case AMDGPU::SReg_96RegClassID:
2255 case AMDGPU::VReg_96RegClassID:
2256 case AMDGPU::AReg_96RegClassID:
2257 case AMDGPU::VReg_96_Align2RegClassID:
2258 case AMDGPU::AReg_96_Align2RegClassID:
2259 case AMDGPU::AV_96RegClassID:
2260 case AMDGPU::AV_96_Align2RegClassID:
2262 case AMDGPU::SGPR_128RegClassID:
2263 case AMDGPU::SReg_128RegClassID:
2264 case AMDGPU::VReg_128RegClassID:
2265 case AMDGPU::AReg_128RegClassID:
2266 case AMDGPU::VReg_128_Align2RegClassID:
2267 case AMDGPU::AReg_128_Align2RegClassID:
2268 case AMDGPU::AV_128RegClassID:
2269 case AMDGPU::AV_128_Align2RegClassID:
2271 case AMDGPU::SGPR_160RegClassID:
2272 case AMDGPU::SReg_160RegClassID:
2273 case AMDGPU::VReg_160RegClassID:
2274 case AMDGPU::AReg_160RegClassID:
2275 case AMDGPU::VReg_160_Align2RegClassID:
2276 case AMDGPU::AReg_160_Align2RegClassID:
2277 case AMDGPU::AV_160RegClassID:
2278 case AMDGPU::AV_160_Align2RegClassID:
2280 case AMDGPU::SGPR_192RegClassID:
2281 case AMDGPU::SReg_192RegClassID:
2282 case AMDGPU::VReg_192RegClassID:
2283 case AMDGPU::AReg_192RegClassID:
2284 case AMDGPU::VReg_192_Align2RegClassID:
2285 case AMDGPU::AReg_192_Align2RegClassID:
2286 case AMDGPU::AV_192RegClassID:
2287 case AMDGPU::AV_192_Align2RegClassID:
2289 case AMDGPU::SGPR_224RegClassID:
2290 case AMDGPU::SReg_224RegClassID:
2291 case AMDGPU::VReg_224RegClassID:
2292 case AMDGPU::AReg_224RegClassID:
2293 case AMDGPU::VReg_224_Align2RegClassID:
2294 case AMDGPU::AReg_224_Align2RegClassID:
2295 case AMDGPU::AV_224RegClassID:
2296 case AMDGPU::AV_224_Align2RegClassID:
2298 case AMDGPU::SGPR_256RegClassID:
2299 case AMDGPU::SReg_256RegClassID:
2300 case AMDGPU::VReg_256RegClassID:
2301 case AMDGPU::AReg_256RegClassID:
2302 case AMDGPU::VReg_256_Align2RegClassID:
2303 case AMDGPU::AReg_256_Align2RegClassID:
2304 case AMDGPU::AV_256RegClassID:
2305 case AMDGPU::AV_256_Align2RegClassID:
2307 case AMDGPU::SGPR_288RegClassID:
2308 case AMDGPU::SReg_288RegClassID:
2309 case AMDGPU::VReg_288RegClassID:
2310 case AMDGPU::AReg_288RegClassID:
2311 case AMDGPU::VReg_288_Align2RegClassID:
2312 case AMDGPU::AReg_288_Align2RegClassID:
2313 case AMDGPU::AV_288RegClassID:
2314 case AMDGPU::AV_288_Align2RegClassID:
2316 case AMDGPU::SGPR_320RegClassID:
2317 case AMDGPU::SReg_320RegClassID:
2318 case AMDGPU::VReg_320RegClassID:
2319 case AMDGPU::AReg_320RegClassID:
2320 case AMDGPU::VReg_320_Align2RegClassID:
2321 case AMDGPU::AReg_320_Align2RegClassID:
2322 case AMDGPU::AV_320RegClassID:
2323 case AMDGPU::AV_320_Align2RegClassID:
2325 case AMDGPU::SGPR_352RegClassID:
2326 case AMDGPU::SReg_352RegClassID:
2327 case AMDGPU::VReg_352RegClassID:
2328 case AMDGPU::AReg_352RegClassID:
2329 case AMDGPU::VReg_352_Align2RegClassID:
2330 case AMDGPU::AReg_352_Align2RegClassID:
2331 case AMDGPU::AV_352RegClassID:
2332 case AMDGPU::AV_352_Align2RegClassID:
2334 case AMDGPU::SGPR_384RegClassID:
2335 case AMDGPU::SReg_384RegClassID:
2336 case AMDGPU::VReg_384RegClassID:
2337 case AMDGPU::AReg_384RegClassID:
2338 case AMDGPU::VReg_384_Align2RegClassID:
2339 case AMDGPU::AReg_384_Align2RegClassID:
2340 case AMDGPU::AV_384RegClassID:
2341 case AMDGPU::AV_384_Align2RegClassID:
2343 case AMDGPU::SGPR_512RegClassID:
2344 case AMDGPU::SReg_512RegClassID:
2345 case AMDGPU::VReg_512RegClassID:
2346 case AMDGPU::AReg_512RegClassID:
2347 case AMDGPU::VReg_512_Align2RegClassID:
2348 case AMDGPU::AReg_512_Align2RegClassID:
2349 case AMDGPU::AV_512RegClassID:
2350 case AMDGPU::AV_512_Align2RegClassID:
2352 case AMDGPU::SGPR_1024RegClassID:
2353 case AMDGPU::SReg_1024RegClassID:
2354 case AMDGPU::VReg_1024RegClassID:
2355 case AMDGPU::AReg_1024RegClassID:
2356 case AMDGPU::VReg_1024_Align2RegClassID:
2357 case AMDGPU::AReg_1024_Align2RegClassID:
2358 case AMDGPU::AV_1024RegClassID:
2359 case AMDGPU::AV_1024_Align2RegClassID:
2373 unsigned RCID = Desc.
operands()[OpNo].RegClass;
2382 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2383 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2384 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2385 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2386 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2387 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2388 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2389 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2390 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2391 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2408 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2409 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2410 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2411 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2412 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2413 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2414 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2415 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2416 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2417 (Val == 0x3e22f983 && HasInv2Pi);
2428 return Val == 0x3C00 ||
2443 int16_t Trunc =
static_cast<int16_t
>(
Literal);
2449 int16_t Lo16 =
static_cast<int16_t
>(
Literal);
2450 int16_t Hi16 =
static_cast<int16_t
>(
Literal >> 16);
2455 int16_t Lo16 =
static_cast<int16_t
>(
Literal);
2459 int16_t Hi16 =
static_cast<int16_t
>(
Literal >> 16);
2468 int16_t Lo16 =
static_cast<int16_t
>(
Literal);
2472 int16_t Hi16 =
static_cast<int16_t
>(
Literal >> 16);
2475 return Lo16 == Hi16;
2497 return A->hasAttribute(Attribute::InReg) ||
2498 A->hasAttribute(Attribute::ByVal);
2539 int64_t EncodedOffset) {
2541 : isUInt<8>(EncodedOffset);
2545 int64_t EncodedOffset,
2549 isInt<21>(EncodedOffset);
2553 return (ByteOffset & 3) == 0;
2562 return ByteOffset >> 2;
2566 int64_t ByteOffset,
bool IsBuffer) {
2570 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2575 return std::nullopt;
2579 ? std::optional<int64_t>(EncodedOffset)
2584 int64_t ByteOffset) {
2586 return std::nullopt;
2589 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2603struct SourceOfDivergence {
2606const SourceOfDivergence *lookupSourceOfDivergence(
unsigned Intr);
2613#define GET_SourcesOfDivergence_IMPL
2614#define GET_UniformIntrinsics_IMPL
2615#define GET_Gfx9BufferFormat_IMPL
2616#define GET_Gfx10BufferFormat_IMPL
2617#define GET_Gfx11PlusBufferFormat_IMPL
2618#include "AMDGPUGenSearchableTables.inc"
2623 return lookupSourceOfDivergence(IntrID);
2627 return lookupAlwaysUniform(IntrID);
2631 uint8_t NumComponents,
2635 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2637 :
isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2638 NumComponents, NumFormat)
2639 : getGfx9BufferFormatInfo(BitsPerComp,
2640 NumComponents, NumFormat);
2647 : getGfx9BufferFormatInfo(
Format);
2656 OS <<
"Unsupported";
unsigned const MachineRegisterInfo * MRI
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
#define S_00B848_MEM_ORDERED(x)
#define S_00B848_WGP_MODE(x)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSramEccSupported() const
void setTargetIDFromFeaturesString(StringRef FS)
bool isSramEccOnOrAny() const
TargetIDSetting getXnackSetting() const
bool isXnackOnOrAny() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
bool isXnackSupported() const
void setTargetIDFromTargetIDStream(StringRef TargetID)
std::string toString() const
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
This class represents an incoming formal argument to a Function.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned short NumOperands
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Interface to description of machine instruction set.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
A Module instance is used to store all the information related to an LLVM module.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool endswith(StringRef Suffix) const
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
@ ET_DUAL_SRC_BLEND_MAX_IDX
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
@ COMPLETION_ACTION_OFFSET
@ MULTIGRID_SYNC_ARG_OFFSET
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS_NUM
constexpr unsigned BANKS_NUM[]
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isInlinableIntLiteralV216(int32_t Literal)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getCodeObjectVersion(const Module &M)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool hasXNACK(const MCSubtargetInfo &STI)
unsigned getVOPDOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getAmdhsaCodeObjectVersion()
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
std::optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ ELFABIVERSION_AMDGPU_HSA_V4
@ ELFABIVERSION_AMDGPU_HSA_V5
@ ELFABIVERSION_AMDGPU_HSA_V3
@ ELFABIVERSION_AMDGPU_HSA_V2
@ FLOAT_DENORM_MODE_FLUSH_NONE
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Represents the counter values to wait for in an s_waitcnt instruction.
uint32_t compute_pgm_rsrc1
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3